![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/healthcare/entity_resolution/NLU_atc_resolver_pipeline.ipynb)

# Pipeline for Anatomic Therapeutic Chemical (ATC) Sentence Entity Resolver

This advanced pipeline extracts `DRUG` entities from clinical texts and utilizes the `sbiobert_base_cased_mli` Sentence Bert Embeddings to map these entities to their corresponding Anatomic Therapeutic Chemical (ATC) codes.

In [1]:
import json, os
from google.colab import files

if 'spark_jsl.json' not in os.listdir():
  license_keys = files.upload()
  os.rename(list(license_keys.keys())[0], 'spark_jsl.json')

with open('spark_jsl.json') as f:
    license_keys = json.load(f)

# Defining license key-value pairs as local variables
locals().update(license_keys)
os.environ.update(license_keys)

In [None]:
# Installing pyspark and spark-nlp
! pip install --upgrade -q pyspark==3.1.2 spark-nlp==$PUBLIC_VERSION

# Installing NLU
! pip install --upgrade --q nlu --no-dependencies

# Installing Spark NLP Healthcare
! pip install --upgrade -q spark-nlp-jsl==$JSL_VERSION  --extra-index-url https://pypi.johnsnowlabs.com/$SECRET

# Installing Spark NLP Display Library for visualization
! pip install -q spark-nlp-display

In [2]:
import json
import os

import sparknlp
import sparknlp_jsl
import nlu

from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *

from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.ml import Pipeline,PipelineModel

import pandas as pd
pd.set_option('display.max_colwidth', 200)

import warnings
warnings.filterwarnings('ignore')

params = {"spark.driver.memory":"16G",
          "spark.kryoserializer.buffer.max":"2000M",
          "spark.driver.maxResultSize":"2000M"}

print("Spark NLP Version :", sparknlp.version())
print("Spark NLP_JSL Version :", sparknlp_jsl.version())

spark = sparknlp_jsl.start(license_keys['SECRET'],params=params)

spark

Spark NLP Version : 5.3.1
Spark NLP_JSL Version : 5.3.1


In [3]:
pipe = nlu.load("en.resolve.atc_pipeline")

atc_resolver_pipeline download started this may take some time.
Approx size to download 2 GB
[OK!]


In [4]:
text = ["""She was immediately given hydrogen peroxide 30 mg and amoxicillin twice daily for 10 days to treat the infection on her leg. She has a history of taking magnesium hydroxide."""]

In [5]:
df = pipe.predict(text)

[91m🚨 Your Spark-Healthcare is outdated, installed==5.3.1 but latest version==5.3.0
You can run [92m nlp.install() [39mto update Spark-Healthcare


In [6]:
df

Unnamed: 0,document,entities_ner_chunk,entities_ner_chunk_class,entities_ner_chunk_confidence,entities_ner_chunk_origin_chunk,entities_ner_chunk_origin_sentence,resolution_atc_code,resolution_atc_code_confidence,resolution_atc_code_distance,resolution_atc_code_k_aux_labels,...,resolution_atc_code_k_cos_distances,resolution_atc_code_k_distances,resolution_atc_code_k_resolution,resolution_atc_code_origin_sentence,resolution_atc_code_resolved_text,resolution_atc_code_target_text,resolution_atc_code_token,sentence_dl,sentence_embeddings,word_embedding_word_embeddings
0,She was immediately given hydrogen peroxide 30 mg and amoxicillin twice daily for 10 days to treat the infection on her leg. She has a history of taking magnesium hydroxide.,hydrogen peroxide,DRUG,0.90765,0.0,0.0,A01AB02,0.9985,0.0,"[[ATC 5th, ATC 5th, ATC 4th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 4th, ATC 5th, ATC 5th], [ATC 5th, ATC 5th, ATC...",...,"[[0.0000, 0.0893, 0.1199, 0.1254, 0.1337, 0.1608, 0.1858, 0.1948, 0.2023, 0.2076, 0.1955, 0.2121, 0.2130, 0.2139, 0.2291, 0.2172, 0.2239, 0.2208, 0.2142], [0.0000, 0.0820, 0.0967, 0.1006, 0.1082, ...","[[0.0000, 7.2638, 8.5359, 8.6054, 8.8623, 9.8050, 10.4785, 10.6252, 10.8362, 10.8516, 10.8802, 11.0478, 11.2217, 11.2228, 11.3405, 11.3486, 11.3811, 11.4295, 11.4679], [0.0000, 6.9141, 7.4772, 7.6...","[[hydrogen peroxide , hydrogen peroxide; otic, Peroxides, ethanol / hydrogen peroxide , benzoyl peroxide , oxybate , diazoxide , magnesium peroxide, hydroxyzine , sucroferric oxyhydroxide , carbon...",0,hydrogen peroxide,hydrogen peroxide,hydrogen peroxide,"[She was immediately given hydrogen peroxide 30 mg and amoxicillin twice daily for 10 days to treat the infection on her leg., She has a history of taking magnesium hydroxide.]","[[0.23269009590148926, -0.2680894732475281, -0.7400643229484558, -0.3144417405128479, 0.5825711488723755, -0.10474076122045517, -0.01933845318853855, 0.24036844074726105, 0.3761536180973053, 0.856...","[[-0.21964989602565765, -0.2844458520412445, -0.10418396443128586, -0.5357521772384644, -0.06646879762411118, -0.444497287273407, -0.5000978708267212, -0.5944756269454956, 0.1369660645723343, 0.04..."
0,She was immediately given hydrogen peroxide 30 mg and amoxicillin twice daily for 10 days to treat the infection on her leg. She has a history of taking magnesium hydroxide.,amoxicillin,,0.9995,,,J01CA04,0.9961,0.0,"[[ATC 5th, ATC 5th, ATC 4th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 4th, ATC 5th, ATC 5th], [ATC 5th, ATC 5th, ATC...",...,"[[0.0000, 0.0893, 0.1199, 0.1254, 0.1337, 0.1608, 0.1858, 0.1948, 0.2023, 0.2076, 0.1955, 0.2121, 0.2130, 0.2139, 0.2291, 0.2172, 0.2239, 0.2208, 0.2142], [0.0000, 0.0820, 0.0967, 0.1006, 0.1082, ...","[[0.0000, 7.2638, 8.5359, 8.6054, 8.8623, 9.8050, 10.4785, 10.6252, 10.8362, 10.8516, 10.8802, 11.0478, 11.2217, 11.2228, 11.3405, 11.3486, 11.3811, 11.4295, 11.4679], [0.0000, 6.9141, 7.4772, 7.6...","[[hydrogen peroxide , hydrogen peroxide; otic, Peroxides, ethanol / hydrogen peroxide , benzoyl peroxide , oxybate , diazoxide , magnesium peroxide, hydroxyzine , sucroferric oxyhydroxide , carbon...",0,amoxicillin,amoxicillin,amoxicillin,"[She was immediately given hydrogen peroxide 30 mg and amoxicillin twice daily for 10 days to treat the infection on her leg., She has a history of taking magnesium hydroxide.]","[[0.23269009590148926, -0.2680894732475281, -0.7400643229484558, -0.3144417405128479, 0.5825711488723755, -0.10474076122045517, -0.01933845318853855, 0.24036844074726105, 0.3761536180973053, 0.856...","[[-0.21964989602565765, -0.2844458520412445, -0.10418396443128586, -0.5357521772384644, -0.06646879762411118, -0.444497287273407, -0.5000978708267212, -0.5944756269454956, 0.1369660645723343, 0.04..."
0,She was immediately given hydrogen peroxide 30 mg and amoxicillin twice daily for 10 days to treat the infection on her leg. She has a history of taking magnesium hydroxide.,magnesium hydroxide,,0.9382,,,A02AA04,0.9963,0.0,"[[ATC 5th, ATC 5th, ATC 4th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 5th, ATC 4th, ATC 5th, ATC 5th], [ATC 5th, ATC 5th, ATC...",...,"[[0.0000, 0.0893, 0.1199, 0.1254, 0.1337, 0.1608, 0.1858, 0.1948, 0.2023, 0.2076, 0.1955, 0.2121, 0.2130, 0.2139, 0.2291, 0.2172, 0.2239, 0.2208, 0.2142], [0.0000, 0.0820, 0.0967, 0.1006, 0.1082, ...","[[0.0000, 7.2638, 8.5359, 8.6054, 8.8623, 9.8050, 10.4785, 10.6252, 10.8362, 10.8516, 10.8802, 11.0478, 11.2217, 11.2228, 11.3405, 11.3486, 11.3811, 11.4295, 11.4679], [0.0000, 6.9141, 7.4772, 7.6...","[[hydrogen peroxide , hydrogen peroxide; otic, Peroxides, ethanol / hydrogen peroxide , benzoyl peroxide , oxybate , diazoxide , magnesium peroxide, hydroxyzine , sucroferric oxyhydroxide , carbon...",1,magnesium hydroxide,magnesium hydroxide,magnesium hydroxide,"[She was immediately given hydrogen peroxide 30 mg and amoxicillin twice daily for 10 days to treat the infection on her leg., She has a history of taking magnesium hydroxide.]","[[0.23269009590148926, -0.2680894732475281, -0.7400643229484558, -0.3144417405128479, 0.5825711488723755, -0.10474076122045517, -0.01933845318853855, 0.24036844074726105, 0.3761536180973053, 0.856...","[[-0.21964989602565765, -0.2844458520412445, -0.10418396443128586, -0.5357521772384644, -0.06646879762411118, -0.444497287273407, -0.5000978708267212, -0.5944756269454956, 0.1369660645723343, 0.04..."


In [9]:
df[[ "entities_ner_chunk","resolution_atc_code"]]

Unnamed: 0,entities_ner_chunk,resolution_atc_code
0,hydrogen peroxide,A01AB02
0,amoxicillin,J01CA04
0,magnesium hydroxide,A02AA04
