![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/24.Improved_Entity_Resolvers_in_SparkNLP_with_sBert.ipynb)

# 24. Improved Entity Resolvers in Spark NLP with sBert

In [None]:
!python3 -m pip install --upgrade spark-nlp-jsl==2.7.2 --user --extra-index-url https://pypi.johnsnowlabs.com/$secret

In [135]:
import sparknlp

sparknlp.version()

'2.6.5'

In [136]:
import sparknlp_jsl

sparknlp_jsl.version()

'2.7.2'

In [1]:
keyfile = 'workshop_license_keys_365.json'

import os
import json
import pandas as pd
import numpy as np
import os

os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]

with open(keyfile, 'r') as f:
    license_keys = json.load(f)


In [2]:
secret = license_keys['SECRET']

os.environ['SPARK_NLP_LICENSE'] = license_keys['SPARK_NLP_LICENSE']
os.environ['AWS_ACCESS_KEY_ID']= license_keys['AWS_ACCESS_KEY_ID']
os.environ['AWS_SECRET_ACCESS_KEY'] = license_keys['AWS_SECRET_ACCESS_KEY']
jsl_version = license_keys['JSL_VERSION']
version = license_keys['PUBLIC_VERSION']

import json
import os
from pyspark.ml import Pipeline
from pyspark.sql import SparkSession

import sparknlp_jsl
import sys, os, time
from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp.util import *
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *

from pyspark.sql import functions as F
from pyspark.ml import Pipeline, PipelineModel


params = {"spark.driver.memory":"32G",
"spark.kryoserializer.buffer.max":"2000M",
"spark.driver.maxResultSize":"2000M"}
 
spark = sparknlp_jsl.start(secret, params=params)


In [3]:
spark

## Snomed pipeline

In [163]:
documentAssembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings\
      .pretrained('sbiobert_base_cased_mli', 'en','clinical/models')\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("sbert_embeddings")
    
snomed_ct_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_findings","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("snomed_code")\
  .setDistanceFunction("EUCLIDEAN")

snomed_pipelineModel = PipelineModel(
    stages = [
        documentAssembler,
        sbert_embedder,
        snomed_ct_resolver])

snomed_lp = LightPipeline(snomed_pipelineModel)


sbiobert_base_cased_mli download started this may take some time.
Approximate size to download 384.3 MB
[OK!]
sbiobertresolve_snomed_findings download started this may take some time.
Approximate size to download 1.4 GB
[OK!]


## ICD10CM pipeline

In [16]:
documentAssembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings\
      .pretrained('sbiobert_base_cased_mli', 'en','clinical/models')\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("sbert_embeddings")
    
icd10_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_icd10cm_augmented","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("icd10cm_code")\
  .setDistanceFunction("EUCLIDEAN")

icd_pipelineModel = PipelineModel(
    stages = [
        documentAssembler,
        sbert_embedder,
        icd10_resolver])

icd_lp = LightPipeline(icd_pipelineModel)


sbiobert_base_cased_mli download started this may take some time.
Approximate size to download 384.3 MB
[OK!]
sbiobertresolve_icd10cm_augmented download started this may take some time.
Approximate size to download 1.2 GB
[OK!]


## RxNorm pipeline

In [23]:
documentAssembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings\
      .pretrained('sbiobert_base_cased_mli', 'en','clinical/models')\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("sbert_embeddings")
    
rxnorm_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_rxnorm","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("rxnorm_code")\
  .setDistanceFunction("EUCLIDEAN")

rxnorm_pipelineModel = PipelineModel(
    stages = [
        documentAssembler,
        sbert_embedder,
        rxnorm_resolver])

rxnorm_lp = LightPipeline(rxnorm_pipelineModel)


sbiobert_base_cased_mli download started this may take some time.
Approximate size to download 384.3 MB
[OK!]
sbiobertresolve_rxnorm download started this may take some time.
Approximate size to download 810.7 MB
[OK!]


## CPT pipeline

In [153]:
documentAssembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings\
      .pretrained('sbiobert_base_cased_mli', 'en','clinical/models')\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("sbert_embeddings")
    
cpt_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_cpt_procedures_augmented","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("cpt_code")\
  .setDistanceFunction("EUCLIDEAN")

cpt_pipelineModel = PipelineModel(
    stages = [
        documentAssembler,
        sbert_embedder,
        cpt_resolver])

cpt_lp = LightPipeline(cpt_pipelineModel)


sbiobert_base_cased_mli download started this may take some time.
Approximate size to download 384.3 MB
[OK!]
sbiobertresolve_cpt_procedures_augmented download started this may take some time.
Approximate size to download 112.2 MB
[OK!]


## all the resolvers in the same pipeline 
### (just to show how it is done..  will not be used in this notebook)

In [None]:

documentAssembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings\
      .pretrained('sbiobert_base_cased_mli', 'en','clinical/models')\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("sbert_embeddings")
    
snomed_ct_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_findings","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("snomed_code")\
  .setDistanceFunction("EUCLIDEAN")

icd10_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_icd10cm_augmented","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("icd10cm_code")\
  .setDistanceFunction("EUCLIDEAN")

rxnorm_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_rxnorm","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("rxnorm_code")\
  .setDistanceFunction("EUCLIDEAN")

cpt_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_cpt_procedures_augmented","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("cpt_code")\
  .setDistanceFunction("EUCLIDEAN")

resolver_pipelineModel = PipelineModel(
    stages = [
        documentAssembler,
        sbert_embedder,
        snomed_ct_resolver,
        icd10_resolver,
        rxnorm_resolver,
        cpt_resolver])

resolver_lp = LightPipeline(resolver_pipelineModel)


## utility functions

In [154]:
import pandas as pd

pd.set_option('display.max_colwidth', 0)


def get_codes (lp, text, vocab='icd10cm_code'):
    
    full_light_result = lp.fullAnnotate(text)

    chunks = []
    codes = []
    begin = []
    end = []
    resolutions=[]
    entities=[]
    all_distances =[]
    all_codes=[]
    
    for chunk, code in zip(full_light_result[0]['ner_chunk'], full_light_result[0][vocab]):
            
        begin.append(chunk.begin)
        end.append(chunk.end)
        chunks.append(chunk.result)
        entities.append(chunk.metadata['entity']) 
        codes.append(code.result) 
        all_codes.append(code.metadata['all_k_results'].split(':::'))
        resolutions.append(code.metadata['all_k_resolutions'].split(':::'))
        all_distances.append(code.metadata['all_k_distances'].split(':::'))

    df = pd.DataFrame({'chunks':chunks, 'begin': begin, 'end':end, 'entity':entities,
                    'code':codes,'all_codes':all_codes, 
                    'resolutions':resolutions, 'all_distances':all_distances})

    
    return df



## getting some predictions from resolvers

In [50]:
text = 'bladder cancer'

%time get_codes (icd_lp, text, vocab='icd10cm_code')

CPU times: user 9.88 ms, sys: 5.1 ms, total: 15 ms
Wall time: 704 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,bladder cancer,0,13,,C679,"[C679, Z126, D090, D494, C7911]","[bladder cancer, suspected bladder cancer, cancer in situ of urinary bladder, tumor of bladder neck, malignant tumour of bladder neck]","[0.0000, 7.4914, 7.9359, 8.2425, 9.0119]"


In [138]:
text = 'severe stomach pain'

%time get_codes (icd_lp, text, vocab='icd10cm_code')

CPU times: user 8.01 ms, sys: 6.76 ms, total: 14.8 ms
Wall time: 708 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,severe stomach pain,0,18,,R101,"[R101, R109, R1082, M791, R104, R1084, K388, R529, F454, R079, R53, R1013, R52, M7918, R51, R520, R4589, R1033, R072, R103, R1010]","[upper abdominal pain, intractable abdominal pain, epigastric pain, sore pain, generalized colicky abdominal pain, generalized abdominal pain, colicky abdominal pain, severe pain, psychosomatic abdominal pain, upper chest pain, exhausting with pain, burning epigastric pain, heavy pain, abdominal muscle pain, head pain, horrible present pain, cruel with pain, central abdominal pain, esophageal pain, o/e - epigastric pain, ulcer-type pain]","[6.0181, 6.1794, 6.2316, 6.3038, 6.3213, 6.3808, 6.4652, 6.5255, 6.6239, 6.6474, 6.6767, 6.7574, 6.7577, 6.7744, 6.8206, 6.8932, 6.9044, 7.0089, 7.0235, 7.0339, 7.0490]"


In [51]:
text = 'bladder cancer'

%time get_codes (snomed_lp, text, vocab='snomed_code')

CPU times: user 6.3 ms, sys: 8.86 ms, total: 15.2 ms
Wall time: 773 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,bladder cancer,0,13,,399326009,"[399326009, 363455001, 425066001, 255108000, 269607003, 154540000, 425231005, 255110003, 393562002, 255109008, 255111004, 92546004, 254932004]","[bladder cancer, bladder cancer, invasive bladder cancer, carcinoma of bladder, carcinoma of bladder, carcinoma of bladder, superficial bladder cancer, adenocarcinoma of bladder, transitional cell carcinoma of bladder, transitional cell carcinoma of bladder, squamous cell carcinoma of bladder, cancer in situ of urinary bladder, tumor of bladder neck]","[0.0000, 0.0000, 5.8576, 6.4952, 6.4952, 6.4952, 7.1698, 7.4762, 7.5159, 7.5159, 7.6426, 7.9359, 8.2425]"


In [52]:
text = '67 yrs-old'

%time get_codes (snomed_lp, text, vocab='snomed_code')

CPU times: user 5.77 ms, sys: 9.53 ms, total: 15.3 ms
Wall time: 771 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,67 yrs-old,0,9,,102527003,"[102527003, 102528008, 2391000124102, 111522004, 28288005, 49808004, 260278007, 248280005, 102891000, 152003002, 183774000, 308508003, 249860006, 246944005]","[age 60 to 64 years (finding), age more than 65 years (finding), universal designation 67, gerontoxon, middle-age, geriatric state, 6/60 (finding), aging, age-related cognitive decline (finding), listed for geriatrics admiss'n, listed for geriatrics admiss'n, geriatric monitoring status (finding), posture appropriate for age, pseudo-gerontoxon]","[11.4971, 11.5051, 11.7625, 12.4103, 12.6651, 12.9846, 13.0190, 13.1351, 13.2770, 13.3563, 13.3563, 13.3586, 13.3714, 13.3961]"


In [53]:
text = 'metformin 100 mg'

%time get_codes (rxnorm_lp, text, vocab='rxnorm_code')

CPU times: user 11.7 ms, sys: 3.59 ms, total: 15.3 ms
Wall time: 468 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,metformin 100 mg,0,15,,406081,"[406081, 576612, 403968, 861024, 404727, 334738, 332848, 861026, 333262, 439563, 450523, 1744000, 484793, 402346, 1726496, 316350, 858858, 336846, 316844, 1946837, 451225, 328507, 437723, 385601, 315677]","[metformin 100 mg/ml, metformin 100 mg/ml [riomet], metformin 100 mg/ml oral solution, metformin hydrochloride 100 mg/ml, metformin 100 mg/ml oral solution [riomet], fenofibrate 100 mg, ciprofibrate 100 mg, metformin hydrochloride 100 mg/ml [riomet], rutin 100 mg, fendiline 100 mg, perazine 100 mg, emtricitabine 100 mg, solifenacin 100 mg, miglustat 100 mg, azacitidine 100 mg, niacin 100 mg, carnosine 100 mg, trimebutine 100 mg, torsemide 100 mg, abemaciclib 100 mg, pyrantel 100 mg, rimantadine 100 mg, azintamide 100 mg, mebeverine 100 mg, cimetidine 100 mg]","[3.8387, 4.8433, 5.6433, 6.3835, 6.4686, 6.5293, 6.5728, 6.9061, 6.9297, 6.9512, 7.0943, 7.1014, 7.1202, 7.1323, 7.1327, 7.1425, 7.2076, 7.2118, 7.2518, 7.2531, 7.2533, 7.2761, 7.2802, 7.2915, 7.3286]"


In [137]:
text = 'heart surgery'

%time get_codes (cpt_lp, text, vocab='cpt_code')

CPU times: user 6.68 ms, sys: 9.15 ms, total: 15.8 ms
Wall time: 103 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,heart surgery,0,12,,33257,"[33257, 33258, 33259, 33999, 0306T, 0304T, 0302T, 0167T, 93583, 1006057, 62319]","[Cardiac surgery procedure, Cardiac surgery procedure, Cardiac surgery procedure, Cardiac surgery procedure, Heart procedure, Heart procedure, Heart procedure, Heart implantation, Operation on cardiac septum, Surgical Procedures on the Heart and Pericardium, Insertion of catheter into heart chamber]","[8.1433, 8.1433, 8.1433, 8.1433, 9.3368, 9.3368, 9.3368, 10.0148, 10.0324, 10.6450, 10.8858]"


In [156]:
text = 'ct abdomen'

%time get_codes (cpt_lp, text, vocab='cpt_code')

CPU times: user 12.2 ms, sys: 2.56 ms, total: 14.8 ms
Wall time: 99.8 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,ct abdomen,0,9,,62284,"[62284, 74183, 74181, 70481, 70480, 76700, 76705, 0157T, 49329, 49999, 61108, 70540, 70543, 71555, 49080, 49083, 33891, 34051, 34001, 35002]","[CT of spine, MRI of abdomen, MRI of abdomen, CT of orbits, CT of orbits, US abdominal scan, US abdominal scan, Opening of abdomen, Procedure on abdomen, Procedure on abdomen, Centesis, MRI of neck, MRI of neck, MRI of chest, Drainage of abdomen, Drainage of abdomen, Incision of neck, Incision of neck, Incision of neck, Incision of neck]","[8.8537, 9.1304, 9.1304, 9.5856, 9.5856, 9.9397, 9.9397, 9.9516, 10.2509, 10.2509, 10.6676, 10.7085, 10.7085, 10.7766, 10.8118, 10.8118, 10.9787, 10.9787, 10.9787, 10.9787]"


In [157]:
text = 'Left heart cath'

%time get_codes (cpt_lp, text, vocab='cpt_code')

CPU times: user 13.4 ms, sys: 4.48 ms, total: 17.9 ms
Wall time: 105 ms


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,Left heart cath,0,14,,93462,"[93462, 93607, 0281T, 93620, 93514, 93452, 93565, 93568, 93564, 93567, 93566, 62319]","[Cardiac catheterisation, left heart, Left ventricular recording (Deprecated), LAA - Plication of left atrial appendage, Left ventricular recording, Left heart catheterization by left ventricular puncture, Ventriculography of left ventricle, CCA - Cardiac catheterisation, CCA - Cardiac catheterisation, CCA - Cardiac catheterisation, CCA - Cardiac catheterisation, CCA - Cardiac catheterisation, CCA - Cardiac catheterisation]","[8.6548, 9.8134, 9.8843, 10.0954, 10.3075, 10.5515, 10.6741, 10.6741, 10.6741, 10.6741, 10.6741, 10.6741]"


In [165]:
icd_chunks = ['advanced liver disease',
'advanced lung disease',
'basal cell carcinoma of skin',
'acute maxillary sinusitis',
'chronic kidney disease stage',
'diabetes mellitus type 2',
'lymph nodes of multiple sites',
'other chronic pain',
'severe abdominal pain',
'squamous cell carcinoma of skin',
'type 2 diabetes mellitus']

snomed_chunks= ['anemia', 'adenocarcinoma', 'aortic valve stenosis',
       'atherosclerosis', 'atrial fibrillation', 'dyspnea',
       'hypertension', 'lung cancer', 'seizure',
       'squamous cell carcinoma', 'stage IIIB', 'mediastinal lymph nodes']

In [61]:
from IPython.display import display

for chunk in icd_chunks:

    print ('>> ',chunk)
    display(get_codes (icd_lp, chunk, vocab='icd10cm_code'))

>>  advanced liver disease


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,advanced liver disease,0,21,,K7460,"[K7460, K769, K721, K746, R945, Z871, K7402, K729, K7290, K7689, D489, D499, R748, K719]","[advanced cirrhosis, chronic liver disease, chronic hepatic failure, cirrhosis and chronic liver disease, abnormal liver function, h/o: liver disease, hepatic fibrosis, advanced fibrosis, liver function failure, liver failure, abnormal hepatic function, tumor of advanced extent, tumour of advanced extent, elevated liver enzymes level, toxic liver disease]","[6.8937, 8.3979, 8.5299, 9.1728, 9.3652, 9.3766, 9.4469, 9.4751, 9.5000, 9.8048, 9.8914, 9.8951, 10.0151, 10.0640]"


>>  advanced lung disease


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,advanced lung disease,0,20,,D499,"[D499, D489, J449, H359, J849, J628, K7460, J399, I5083, J961, J989, I509, J8410, R54, J984, I279]","[tumour of advanced extent, tumor of advanced extent, severe chronic obstructive pulmonary disease, advanced retinal disease, chronic infiltrative lung disease, chronic lung disease, advanced cirrhosis, upper respiratory disease, high output heart failure, chronic respiratory failure, chronic respiratory system disease, cardiac failure, progressive lung fibrosis, advanced age, chronic lung disease (disorder), chronic pulmonary heart disease]","[9.2256, 9.4250, 9.5090, 9.5274, 9.5762, 9.5848, 9.9092, 9.9598, 9.9863, 10.1878, 10.2669, 10.2964, 10.3895, 10.4332, 10.4493, 10.4542]"


>>  basal cell carcinoma of skin


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,basal cell carcinoma of skin,0,27,,C4491,"[C4491, C449, C4451, D049, C4461, C4431, C4441, C4499, C4A70, C4481, C445, C4401, C444]","[basal cell carcinoma of skin, nodular basal cell carcinoma of skin, basal cell carcinoma of skin of trunk, basal cell carcinoma of skin in situ, basal cell carcinoma of arm, basal cell carcinoma of face, basal cell carcinoma of scalp, metatypical basal cell carcinoma of skin, trabecular cell carcinoma of skin, basal cell carcinoma of overlapping sites of skin, basal cell carcinoma of truncal skin, basal cell carcinoma of skin of lip, basal cell carcinoma of neck]","[0.0000, 3.3611, 4.1577, 4.5994, 4.6837, 4.9820, 5.4873, 5.5525, 6.0951, 6.1410, 6.1487, 6.3073, 6.4116]"


>>  acute maxillary sinusitis


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,acute maxillary sinusitis,0,24,,J0100,"[J0100, J010, J0101, J320, R220, J349, T170]","[acute maxillary sinusitis, acute maxillary sinusitis (disorder), acute recurrent maxillary sinusitis, chronic maxillary sinusitis, swelling over maxillary sinus, disease of maxillary sinus, foreign body in maxillary sinus (disorder)]","[0.0000, 2.9165, 4.5045, 5.7618, 8.1363, 8.2624, 8.3244]"


>>  chronic kidney disease stage


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,chronic kidney disease stage,0,27,,N189,"[N189, D638, N289, Q619, D631, I120, N18, P2930, I139, N185, N186, Z9189, Z874, Z992, Z8744, T509]","[chronic kidney disease, anaemia in chronic kidney disease, kidney disease, cystic kidney disease, anemia in chronic kidney disease, end stage kidney disease, chronic kidney disease (ckd), chronic renal failure, cardiovascular renal disease, end-stage renal disease, end stage renal disease, at risk of chronic kidney disease, history of chronic kidney disease, end-stage renal disease (disorder), history of chronic renal impairment (situation), chronic drug-induced renal disease]","[4.1353, 6.6097, 6.6225, 6.7265, 6.7782, 6.8781, 7.0392, 7.1090, 7.1230, 7.1691, 7.3866, 7.4167, 7.6542, 7.6851, 7.7092, 7.7946]"


>>  diabetes mellitus type 2


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,diabetes mellitus type 2,0,23,,E119,"[E119, E118, O2411, E113, E139, E1143, E1144, E114, E117, E112, Z8639, E1151, Z863]","[diabetes mellitus type 2, disorder associated with type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, secondary diabetes mellitus, neurological disorder with type 2 diabetes mellitus, disorder of nervous system due to type 2 diabetes mellitus, neurological disorder with diabetes type 2, multiple complications of type 2 diabetes mellitus, kidney disorder associated with type 2 diabetes mellitus, history of diabetes mellitus type 2, peripheral circulatory disorder associated with type 2 diabetes mellitus, history of diabetes mellitus type 2 (situation)]","[0.0000, 4.6853, 5.5377, 5.5751, 5.8812, 6.0231, 6.0334, 6.0512, 6.2205, 6.3302, 6.4521, 6.5249, 6.5542]"


>>  lymph nodes of multiple sites


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,lymph nodes of multiple sites,0,28,,C8408,"[C8408, M6749, R238, T009, R591, M259, R590, J984, L029, C779, R229, L918, C801, R6889, M2540, H531, C8598, R198, C8418]","[neoplasm of lymph nodes of multiple sites, ganglion, multiple sites, multiple lesions, blisters of multiple sites, o/e - lymph nodes tethered, multiple joint involvement, lymphadenopathy of head and/or neck, multiple lesions (finding), boils of multiple sites, regional lymph node involvement present, multiple lumps, multiple skin tags, multiple malignancy, multiple symptoms, effusion of joint of multiple sites, multiple visual images, lymphoma involves multiple lymph node regions, abdominal movements, sezary disease, lymph nodes of multiple sites]","[8.5275, 8.5320, 9.0397, 9.4686, 9.6408, 9.6801, 9.8005, 9.9042, 10.0567, 10.0941, 10.1249, 10.1299, 10.2893, 10.3288, 10.3392, 10.4121, 10.4892, 10.5064, 10.5098]"


>>  other chronic pain


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,other chronic pain,0,17,,G8929,"[G8929, G8928, R52, R108, R078, R522, I999, M2550, R51, M2662, F4541, R521, M2559, R5383, G894, M5499]","[other chronic pain, other chronic postprocedural pain, chronic pain, other abdominal pain, other chest pain, generalized chronic body pains, chronic visceral pain (disorder), multiple joint pain, chronic secondary facial pain, chronic pain in face, chronic psychogenic pain, chronic intractable pain (finding), pain in other specified joint, other fatigue, chronic pain syndrome (disorder), back pain]","[0.0000, 6.3950, 6.4459, 6.5697, 6.6665, 7.2255, 7.5337, 7.5934, 7.7937, 7.7954, 7.8493, 7.8575, 7.8742, 7.9303, 7.9360, 7.9382]"


>>  severe abdominal pain


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,severe abdominal pain,0,20,,R1084,"[R1084, R101, R109, K388, R104, R1033, R102, F454, K805, R529, R52, M2551]","[generalized abdominal pain, upper abdominal pain, intractable abdominal pain, colicky abdominal pain, generalized colicky abdominal pain, central abdominal pain, acute abdominal pain, psychosomatic abdominal pain, recurrent abdominal pain, severe pain, heavy pain, shoulder pain from abdomen]","[3.6894, 3.8009, 3.8049, 4.6934, 4.7209, 4.9342, 5.0550, 5.0671, 5.3116, 5.4290, 5.8415, 5.9116]"


>>  squamous cell carcinoma of skin


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,squamous cell carcinoma of skin,0,30,,C449,"[C449, C443, D049, C4492, C4452, C4499, C4432, C809, C4442, C4A70, C444, C4491, C799]","[squamous cell carcinoma of skin, squamous cell carcinoma of skin of face, squamous cell carcinoma in situ of skin, scc - squamous cell carcinoma of skin, squamous cell carcinoma of skin of trunk, mucoepidermoid carcinoma of skin, squamous cell carcinoma of skin of and unsp parts of face, squamous cell carcinoma, squamous cell carcinoma of skin of neck, trabecular cell carcinoma of skin, squamous cell carcinoma of scalp, basal cell carcinoma of skin, squamous cell carcinomatosis]","[0.0000, 3.9417, 4.3020, 4.5867, 4.6218, 5.1354, 5.6903, 5.8818, 5.9843, 6.1362, 6.2912, 6.2998, 6.3445]"


>>  type 2 diabetes mellitus


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,type 2 diabetes mellitus,0,23,,E119,"[E119, E118, E113, O2411, E139, E117, E1143, E109, E1169, E114, E112, E1151, E1162]","[type 2 diabetes mellitus, disorder associated with type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus, secondary diabetes mellitus, multiple complications of type 2 diabetes mellitus, neurological disorder with type 2 diabetes mellitus, diabetes mellitus, diabetic dyslipidemia associated with type 2 diabetes mellitus, neurological disorder with diabetes type 2, kidney disorder associated with type 2 diabetes mellitus, peripheral circulatory disorder associated with type 2 diabetes mellitus, type 2 diabetes mellitus with skin complications]","[0.0000, 3.7729, 4.8808, 4.9391, 5.1732, 5.2452, 5.5212, 5.5994, 5.7916, 5.9043, 5.9998, 6.0564, 6.0600]"


In [166]:

for chunk in snomed_chunks:

    print ('>> ',chunk)
    display(get_codes (snomed_lp, chunk, vocab='snomed_code'))

>>  anemia


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,anemia,0,5,,271737000,"[271737000, 64593003, 154786001, 11503009, 191268006, 35372003, 191125001, 267513007, 234347009, 737220002, 234360003, 141336002, 164139008, 84027009, 191139001, 154789008]","[anemia, anemia, anemia, relative anemia, chronic anemia, aids with anemia, deficiency anemias, deficiency anemias, secondary anemia, metabolic anemia, vegan anemia, o/e - anemia, o/e - anemia, pernicious anemia, pernicious anemia, pernicious anemia]","[0.0000, 0.0000, 0.0000, 5.3735, 5.3737, 5.6006, 5.7604, 5.7604, 6.5642, 6.6888, 6.6901, 6.7129, 6.7129, 7.0680, 7.0680, 7.0680]"


>>  adenocarcinoma


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,adenocarcinoma,0,13,,443961001,"[443961001, 403902008, 423595004, 189578007, 189614008, 423607006, 722688002, 154433003, 269513004, 269546005, 443416007, 403951006, 307599002, 441535001, 307593001, 154576004, 269624009, 188476000, 443261008]","[adenocarcinoma, adenosquamous carcinoma, adenocarcinoma carcinomatosis, [m]adenocarcinomas, adenocarcinoid tumor, adenocarcinoma of anus, carcinoma, carcinoma, carcinoma, anus carcinoma, adenoma, ceruminous gland adenocarcinoma, sebaceous adenocarcinoma, adenocarcinoma of head and neck, carcinomatosis, carcinomatosis, carcinomatosis, carcinomatosis, oxyphilic adenocarcinoma]","[0.0000, 4.9457, 5.7756, 5.9483, 6.0696, 6.8884, 6.9016, 6.9016, 6.9016, 6.9074, 7.1031, 7.3334, 7.4297, 7.4467, 7.5127, 7.5127, 7.5127, 7.5127, 7.6008]"


>>  aortic valve stenosis


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,aortic valve stenosis,0,20,,60573004,"[60573004, 472783006, 390722003, 67754003, 204436002, 703170006, 194987006, 8722008, 44241007, 19833008, 194733006, 427515002, 60234000, 93384001, 268185002, 703297006]","[aortic valve stenosis, subneoaortic valve stenosis, aortic stenosis, aortic valve sclerosis, supra-valvular aortic stenosis, prosthetic aortic valve stenosis, aortic valve stenosis with insufficiency, aortic valve disorder, heart valve stenosis, nodular calcific aortic valve stenosis, mitral and aortic stenosis, critical stenosis of aortic valve (disorder), aortic valve regurgitation, stenosis of aortic arch, supravalvar aortic stenosis, prosthetic aortic valve stenosis and regurgitation]","[0.0000, 3.7787, 4.1363, 4.4184, 4.5088, 4.5466, 4.8286, 5.2871, 5.3160, 5.7015, 5.7199, 5.7988, 5.8731, 5.8812, 5.9133, 5.9140]"


>>  atherosclerosis


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,atherosclerosis,0,14,,155382007,"[155382007, 155414001, 195251000, 266318005, 194848007, 441574008, 443502000, 41702007, 266231003, 155316000, 194841001, 300920004, 39468009, 155415000, 195252007, 129573006, 266258005]","[atherosclerosis, atherosclerosis, atherosclerosis, atherosclerosis, atherosclerosis, atherosclerosis artery, coronary atherosclerosis, coronary atherosclerosis, coronary atherosclerosis, coronary atherosclerosis, coronary atherosclerosis, carotid atherosclerosis, cardiovascular arteriosclerosis, aortic atherosclerosis, aortic atherosclerosis, atherosclerotic ischemic disease, cerebral atherosclerosis]","[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 4.2508, 5.4051, 5.4051, 5.4051, 5.4051, 5.4051, 5.5311, 5.5726, 5.6470, 5.6470, 5.6516, 6.2210]"


>>  atrial fibrillation


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,atrial fibrillation,0,18,,266306001,"[266306001, 49436004, 155364009, 282825002, 440059007, 426749004, 195081002, 762247006, 164889003, 300996004, 142049000, 440028005, 195080001, 81216002]","[atrial fibrillation, atrial fibrillation, atrial fibrillation, intermittent atrial fibrillation, persistent atrial fibrillation, chronic atrial fibrillation, paroxysmal atrial fibrillation, preexcited atrial fibrillation, electrocardiographic atrial fibrillation, controlled atrial fibrillation (disorder), ecg: atrial fibrillation, permanent atrial fibrillation (disorder), atrial fibrillation and flutter, atrial fibrillation and flutter]","[0.0000, 0.0000, 0.0000, 3.5172, 3.8361, 4.2111, 4.2605, 4.8571, 5.1669, 5.6137, 5.7312, 5.9392, 5.9895, 5.9895]"


>>  dyspnea


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,dyspnea,0,6,,49233005,"[49233005, 267036007, 139201002, 161946002, 60845006, 25209001, 34560001, 59265000, 20112008, 161948001, 139204005, 71646001]","[dyspnea, dyspnea, dyspnea, dyspnea, exertional dyspnea, inspiratory dyspnea, expiratory dyspnea, paroxysmal dyspnea, dyspnea leaning over, dyspnea on exertion, dyspnea on exertion, aids with dyspnea]","[0.0000, 0.0000, 0.0000, 0.0000, 3.4121, 3.6237, 3.6448, 4.3325, 4.3846, 4.5155, 4.5155, 4.5608]"


>>  hypertension


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,hypertension,0,11,,38341003,"[38341003, 155295004, 266287006, 194756002, 194757006, 59621000, 155296003, 270440008, 185725003, 31992008, 155300002, 48146000, 697929007, 64715009, 155297007, 276789009]","[hypertension, hypertension, hypertension, hypertensive disease, essential hypertension, essential hypertension, essential hypertension, hypertension monitored, hypertension monitored, secondary hypertension, secondary hypertension, diastolic hypertension, intermittent hypertension, hypertensive heart disease, hypertensive heart disease, labile hypertension]","[0.0000, 0.0000, 0.0000, 3.5649, 4.7479, 4.7479, 4.7479, 4.9030, 4.9030, 5.2689, 5.2689, 5.5507, 5.8350, 6.1363, 6.1363, 6.3069]"


>>  lung cancer


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,lung cancer,0,10,,187875007,"[187875007, 93880001, 363358000, 154485001, 269561006, 94391008, 448993007, 254637007, 92649001, 354701000119107, 707410008, 254626006, 723301009, 254632001, 126713003, 707596000, 254634000, 363429002, 93859007, 254628007]","[lung cancer, lung cancer, ca - lung cancer, lung carcinoma, lung carcinoma, secondary cancer of lung, carcinoma of lung, non-small cell lung cancer (disorder), cancer in situ of lung, cancer of left lung, solid carcinoma of lung, adenocarcinoma of lung, squamous non-small cell lung cancer, sclc - small cell lung cancer, tumour of lung, carcinosarcoma of lung, squamous cell carcinoma of lung, laryngeal cancer, laryngeal cancer, carcinoma of lung parenchyma]","[0.0000, 0.0000, 6.4548, 6.5016, 6.5016, 7.3239, 7.3551, 7.6227, 8.1235, 8.1420, 8.2615, 8.4661, 8.5329, 8.6163, 8.6370, 8.7714, 8.8035, 9.0262, 9.0262, 9.0663]"


>>  seizure


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,seizure,0,6,,271788002,"[271788002, 91175000, 460681000124100, 163588007, 313287004, 246545002, 246530009, 128613002, 71427006, 246537007, 191714002, 14521008, 29753000, 363692008, 460731000124105, 29963001]","[seizure, seizure, serial seizure, o/e - a seizure, observations of seizure, generalized seizure, versive seizure (finding), seizure disorder, cursive seizure, affective seizure, factitious seizures, visual seizure, partial seizure, partial seizure, recurrent seizure (disorder), deja-vu seizure]","[0.0000, 0.0000, 6.8610, 6.9747, 7.2900, 7.6817, 7.8421, 7.8967, 7.9166, 8.0610, 8.0714, 8.1994, 8.2773, 8.2773, 8.2955, 8.3096]"


>>  squamous cell carcinoma


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,squamous cell carcinoma,0,22,,402815007,"[402815007, 154605007, 425303004, 254651007, 403904009, 189565007, 403905005, 403902008, 400066006, 403903003, 189568009, 403899005, 254656002, 254657006, 423284006, 403892001, 716659002]","[squamous cell carcinoma, squamous cell carcinoma, squamous cell carcinomatosis, cutaneous squamous cell carcinoma, verrucous squamous cell carcinoma, squamous cell carcinoma in situ, multiple squamous cell carcinomata, adenosquamous cell carcinoma, intraepithelial squamous cell carcinoma, signet ring squamous cell carcinoma, keratinising squamous cell carcinoma, squamous cell carcinoma of skin of trunk, squamous cell carcinoma in situ of skin, squamous cell carcinoma in situ of skin, squamous cell carcinoma of skin of neck, squamous cell carcinoma of skin of face, squamous cell carcinoma of head and neck]","[0.0000, 0.0000, 3.4540, 5.0525, 5.3144, 5.4448, 5.5520, 5.5603, 5.9656, 6.2072, 6.2416, 6.5220, 6.5347, 6.5347, 6.6065, 6.7793, 6.8173]"


>>  stage IIIB


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,stage IIIB,0,9,,64062008,"[64062008, 73082003, 45597001, 443815000, 50283003, 48105005, 64877001, 443962008, 55336000, 33907000, 433144002, 443817008, 61026006, 52708004, 434002]","[clinical stage 3 b, clinical stage 3 a, group a3b, pt3a,b category, clinical stage iii, clinical stage 3 c, clinical stage iii s, pn3b category, clinical stage iii d, clinical stage iii e, ckd stage 3, pt3b category, g3 stage, figo cc stage iii, ebr 3]","[6.8802, 8.4251, 8.9579, 8.9624, 9.0434, 9.1384, 9.4788, 9.5235, 9.5287, 9.5992, 9.7577, 9.8641, 10.7132, 10.7660, 10.7975]"


>>  mediastinal lymph nodes


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,mediastinal lymph nodes,0,22,,301296002,"[301296002, 127248001, 49483002, 94147001, 234317007, 52324001, 127245003, 30746006, 274711004, 19616004, 126725000, 301855007, 425061006, 127274007, 76616003, 127232002, 127235000, 127234001]","[mediastinal observation, neoplasm of mediastinal lymph nodes, mediastinal disease, mediastinal mass, lymphangiography of mediastinal lymph nodes, mediastinal lymphadenopathy, neoplasm of intrathoracic lymph nodes, swelling of lymph nodes, mediastinal shift, mediastinal shift, tumour of mediastinum, finding of lymph node, lymphadenopathy of head and/or neck, neoplasm of lymph nodes of multiple sites, disease of lymph node, neoplasm of lymph node, neoplasm of lymph nodes of neck, neoplasm of lymph nodes of face]","[7.3230, 7.3911, 7.8429, 8.5316, 8.9097, 9.0562, 9.1809, 9.1958, 9.2519, 9.2519, 9.4111, 9.4330, 9.4808, 9.5067, 9.5165, 9.6191, 9.6749, 9.6974]"


In [140]:
clinical_chunks = ['bladder cancer',
 'anemia in chronic kidney disease',
 'castleman disease',
 'congestive heart failure',
 'diabetes mellitus type 2',
 'lymph nodes of multiple sites',
 'malignant melanoma of skin',
 'malignant neoplasm of lower lobe, bronchus',
 'metastatic lung cancer',
 'secondary malignant neoplasm of bone',
 'type 2 diabetes mellitus',
 'type 2 diabetes mellitus/insulin',
 'unsp malignant neoplasm of lymph node']


for chunk in clinical_chunks:

    print ('>> ',chunk)
    
    print ('icd10cm_code')
    display(get_codes (icd_lp, chunk, vocab='icd10cm_code'))
    
    print ('snomed_code')
    display(get_codes (snomed_lp, chunk, vocab='snomed_code'))

>>  bladder cancer
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,bladder cancer,0,13,,C679,"[C679, Z126, D090, D494, C7911]","[bladder cancer, suspected bladder cancer, cancer in situ of urinary bladder, tumor of bladder neck, malignant tumour of bladder neck]","[0.0000, 7.4914, 7.9359, 8.2425, 9.0119]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,bladder cancer,0,13,,399326009,"[399326009, 363455001, 425066001, 255108000, 269607003, 154540000, 425231005, 255110003, 393562002, 255109008, 255111004, 92546004, 254932004]","[bladder cancer, bladder cancer, invasive bladder cancer, carcinoma of bladder, carcinoma of bladder, carcinoma of bladder, superficial bladder cancer, adenocarcinoma of bladder, transitional cell carcinoma of bladder, transitional cell carcinoma of bladder, squamous cell carcinoma of bladder, cancer in situ of urinary bladder, tumor of bladder neck]","[0.0000, 0.0000, 5.8576, 6.4952, 6.4952, 6.4952, 7.1698, 7.4762, 7.5159, 7.5159, 7.6426, 7.9359, 8.2425]"


>>  anemia in chronic kidney disease
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,anemia in chronic kidney disease,0,31,,D631,"[D631, D638, N189, Q619, N185, P2930, Z874, E790, Z8744, N184]","[anemia in chronic kidney disease, anaemia in chronic kidney disease, anemia of chronic renal failure, cystic kidney disease, anemia in end stage renal disease, chronic renal failure, history of chronic kidney disease, chronic renal failure syndrome, history of chronic renal impairment, anemia in chronic kidney disease stage 4]","[0.0000, 3.3737, 5.2853, 6.7066, 6.8819, 7.1052, 7.3269, 7.3806, 7.5403, 7.6090]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,anemia in chronic kidney disease,0,31,,707323002,"[707323002, 49708008, 709044004, 236425005, 191270002, 234348004, 236439005, 156973002, 268332003, 707324008, 197654000, 155856009, 90688005]","[anemia in chronic kidney disease, anemia of chronic renal failure, chronic kidney disease, chronic kidney disease, anemia secondary to chronic renal failure, anemia of renal disease, cystic kidney disease, cystic kidney disease, cystic kidney disease, anemia in end stage renal disease, chronic renal failure, chronic renal failure, chronic renal failure]","[0.0000, 5.2853, 5.7326, 5.7326, 5.9749, 6.4353, 6.7066, 6.7066, 6.7066, 6.8819, 7.1052, 7.1052, 7.1052]"


>>  castleman disease
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,castleman disease,0,16,,D47Z2,"[D47Z2, D360, D1801, D477, M316, E755, E7152, L088, G5760, G950, K120, I808, L0889, D681, I311, A187, L817, D67, M898X, A498, M898, Q773, D763, T7029]","[castleman disease, castleman's disease, castleman's disease (disorder), [d] castleman's disease (disorder), horton's disease, chanarin-dorfman disease, addison's disease, knight disease, morton's disease, morvan's disease, sutton's disease, mondor's disease, knight's disease, rosenthal's disease, concato's disease, addison disease, majocchi's disease, christmas disease, medial coronoid process disease (disorder), hjarre's disease, medial coronoid process disease, conradi disease, rosai-dorfman disease, monge's disease]","[0.0000, 4.6389, 7.1305, 9.3531, 10.3352, 10.6785, 10.7004, 10.9132, 10.9655, 10.9841, 10.9996, 11.0178, 11.0602, 11.0672, 11.1010, 11.1543, 11.1766, 11.2100, 11.2162, 11.2402, 11.3004, 11.3282, 11.3318, 11.3924]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,castleman disease,0,16,,207036003,"[207036003, 87511001, 400130008, 203370008, 19604005, 154707007, 267483004, 363732003, 111562000, 31201001, 203367009, 30085007, 111496009, 80244009, 398676009, 69954004, 49762007, 204016007, 186578006, 37980001]","[castleman disease, horton's disease, horton's disease, pierson's disease, chanarin-dorfman disease, addison's disease, addison's disease, addison's disease, addison's disease, knight disease, calve - perthes' disease, morton's disease, morvan's disease, sutton's disease, sutton's disease, mondor's disease, rosenthal's disease, billroth's disease, duke-filatow's disease, concato's disease]","[0.0000, 10.3352, 10.3352, 10.6520, 10.6785, 10.7004, 10.7004, 10.7004, 10.7004, 10.9132, 10.9229, 10.9655, 10.9841, 10.9996, 10.9996, 11.0178, 11.0672, 11.0792, 11.0920, 11.1010]"


>>  congestive heart failure
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,congestive heart failure,0,23,,I500,"[I500, I5081, P290, I502, I503, I509, I5022, I5020, I504, I5082, I5032, I5042]","[congestive heart failure, chronic congestive heart failure, congestive cardiac failure, systolic (congestive) heart failure, diastolic (congestive) heart failure, chronic congestive heart failure (disorder), chronic systolic (congestive) heart failure, systolic heart failure, chronic systolic heart failure, biventricular congestive heart failure (disorder), chronic diastolic (congestive) heart failure, chronic systolic heart failure (disorder)]","[0.0000, 2.8459, 3.7332, 4.1255, 4.1951, 4.5853, 4.6271, 4.8750, 4.9274, 4.9635, 5.1880, 5.5686]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,congestive heart failure,0,23,,42343007,"[42343007, 195108009, 195110006, 88805009, 155375008, 266308000, 92506005, 698594003, 417996009, 418304008, 441481004, 67431000119105, 48447003, 314206003]","[congestive heart failure, congestive heart failure, chronic congestive heart failure, chronic congestive heart failure, congestive cardiac failure, congestive cardiac failure, biventricular congestive heart failure, symptomatic congestive heart failure, systolic heart failure, diastolic heart failure, chronic systolic heart failure, congestive heart failure stage d, chronic heart failure, refractory heart failure]","[0.0000, 0.0000, 2.8459, 2.8459, 3.7332, 3.7332, 3.7775, 4.3293, 4.8750, 4.8884, 4.9274, 5.2706, 5.4019, 5.7094]"


>>  diabetes mellitus type 2
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,diabetes mellitus type 2,0,23,,E119,"[E119, E118, O2411, E113, E139, E1143, E1144, E114, E117, E112, Z8639, E1151, Z863]","[diabetes mellitus type 2, disorder associated with type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, secondary diabetes mellitus, neurological disorder with type 2 diabetes mellitus, disorder of nervous system due to type 2 diabetes mellitus, neurological disorder with diabetes type 2, multiple complications of type 2 diabetes mellitus, kidney disorder associated with type 2 diabetes mellitus, history of diabetes mellitus type 2, peripheral circulatory disorder associated with type 2 diabetes mellitus, history of diabetes mellitus type 2 (situation)]","[0.0000, 4.6853, 5.5377, 5.5751, 5.8812, 6.0231, 6.0334, 6.0512, 6.2205, 6.3302, 6.4521, 6.5249, 6.5542]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,diabetes mellitus type 2,0,23,,44054006,"[44054006, 422014003, 199230006, 422099009, 190384004, 8801005, 421326000, 12811000119100, 420414003, 420279001, 422034002, 422166005, 1531000119102]","[diabetes mellitus type 2, disorder associated with type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, type ii diabetes mellitus, secondary diabetes mellitus, neurological disorder with type 2 diabetes mellitus, complication due to diabetes mellitus type 2, multiple complications of type 2 diabetes mellitus, kidney disorder associated with type 2 diabetes mellitus, retinopathy with type 2 diabetes mellitus, peripheral circulatory disorder associated with type 2 diabetes mellitus, diabetic dermopathy associated with diabetes mellitus type 2]","[0.0000, 4.6853, 5.5377, 5.5751, 5.6585, 5.8812, 6.0231, 6.2058, 6.2205, 6.3302, 6.3569, 6.5249, 6.5627]"


>>  lymph nodes of multiple sites
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,lymph nodes of multiple sites,0,28,,C8408,"[C8408, M6749, R238, T009, R591, M259, R590, J984, L029, C779, R229, L918, C801, R6889, M2540, H531, C8598, R198, C8418]","[neoplasm of lymph nodes of multiple sites, ganglion, multiple sites, multiple lesions, blisters of multiple sites, o/e - lymph nodes tethered, multiple joint involvement, lymphadenopathy of head and/or neck, multiple lesions (finding), boils of multiple sites, regional lymph node involvement present, multiple lumps, multiple skin tags, multiple malignancy, multiple symptoms, effusion of joint of multiple sites, multiple visual images, lymphoma involves multiple lymph node regions, abdominal movements, sezary disease, lymph nodes of multiple sites]","[8.5275, 8.5320, 9.0397, 9.4686, 9.6408, 9.6801, 9.8005, 9.9042, 10.0567, 10.0941, 10.1249, 10.1299, 10.2893, 10.3288, 10.3392, 10.4121, 10.4892, 10.5064, 10.5098]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,lymph nodes of multiple sites,0,28,,127274007,"[127274007, 300582001, 428176004, 302060005, 141354003, 164157009, 250099002, 425061006, 767188008, 238389005, 200626008, 399374009, 248528002, 403857009, 141355002, 164158004, 363500001, 269044004, 274303007, 141342003, 38276004]","[neoplasm of lymph nodes of multiple sites, multiple lesions, blisters of multiple sites, finding of lymph node and lymphatics, o/e - lymph nodes tethered, o/e - lymph nodes tethered, multiple joint involvement, lymphadenopathy of head and/or neck, ganglion of multiple joints, boils of multiple sites, boils of multiple sites, regional lymph node involvement present, multiple lumps, multiple skin tags, o/e - lymph nodes discrete, o/e - lymph nodes discrete, multiple malignancy, o/e - lymph nodes, o/e - lymph nodes, o/e - lymph nodes, multiple symptoms]","[8.5275, 9.0397, 9.4686, 9.5210, 9.6408, 9.6408, 9.6801, 9.8005, 9.8055, 10.0567, 10.0567, 10.0941, 10.1249, 10.1299, 10.1748, 10.1748, 10.2893, 10.3122, 10.3122, 10.3122, 10.3288]"


>>  malignant melanoma of skin
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,malignant melanoma of skin,0,25,,C439,"[C439, C433, D039, C435, C438, C436, C434, C4359]","[malignant melanoma of skin, malignant melanoma of skin of face, in situ malignant melanoma of skin, malignant melanoma of skin of trunk, overlapping malignant melanoma of skin, malignant melanoma of skin of hand, malignant melanoma of scalp, malignant melanoma of skin of trunk (disorder)]","[0.0000, 3.3690, 3.6932, 4.0700, 4.3714, 5.0833, 5.3582, 5.4795]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,malignant melanoma of skin,0,25,,93655004,"[93655004, 93225001, 254731001, 109266006, 93212009, 93651008, 109267002, 188081000, 372244006, 269577007, 154501005, 154503008, 269579005, 188082007, 154506000, 93636004]","[malignant melanoma of skin, malignant melanoma of skin of face, nodular malignant melanoma of skin, in situ malignant melanoma of skin, malignant melanoma of skin of arm, malignant melanoma of skin of trunk, overlapping malignant melanoma of skin, overlapping malignant melanoma of skin, malignant melanoma, malignant melanoma, malignant melanoma, malignant melanoma of trunk, malignant melanoma of trunk, malignant melanoma of skin nos, malignant melanoma of skin nos, malignant melanoma of skin of hand]","[0.0000, 3.3690, 3.5330, 3.6932, 4.0151, 4.0700, 4.3714, 4.3714, 4.4163, 4.4163, 4.4163, 4.5833, 4.5833, 5.0068, 5.0068, 5.0833]"


>>  malignant neoplasm of lower lobe, bronchus
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,"malignant neoplasm of lower lobe, bronchus",0,41,,C343,"[C343, C3432, C3430, C780, C3402, C399, D0222, C3431, C34, D381]","[malignant neoplasm of lower lobe bronchus, malignant neoplasm of lower lobe bronchus (disorder), malignant neoplasm of lower lobe, bronchus or lung (disorder), neoplasm of bronchus of left lower lobe, malignant neoplasm of left main bronchus, malignant neoplasm of lower respiratory tract, neoplasm of bronchus of left lower lobe (disorder), malignant neoplasm of lower lobe, right bronchus or lung, malignant neoplasm of bronchus and lung, neoplasm of left lower lobe of lung]","[2.6339, 3.7200, 5.4430, 5.7742, 6.1573, 6.2779, 6.3404, 6.3440, 6.9018, 6.9311]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,"malignant neoplasm of lower lobe, bronchus",0,41,,187869003,"[187869003, 187870002, 187868006, 126712008, 94228007, 187871003, 724059003, 430621000, 372110008, 93729006, 724056005, 126718007, 126709005, 187660002, 94375005]","[malignant neoplasm of lower lobe bronchus, malignant neoplasm of lower lobe of lung, malignant neoplasm of lower lobe, bronchus or lung, neoplasm of bronchus of left lower lobe, secondary malignant neoplasm of bronchus of left lower lobe, malignant neoplasm of lower lobe, bronchus or lung nos, malignant neoplasm of lower lobe of left lung, malignant neoplasm of lower respiratory tract, primary malignant neoplasm of lower lobe, bronchus or lung, primary malignant neoplasm of bronchus of left lower lobe, malignant neoplasm of lower lobe of right lung, neoplasm of left lower lobe of lung, neoplasm of bronchus of left upper lobe, malignant neoplasm of lower buccal sulcus, secondary malignant neoplasm of left lower lobe of lung]","[2.6339, 4.6868, 5.4302, 5.7742, 5.7767, 6.0076, 6.0192, 6.2779, 6.3899, 6.3965, 6.7675, 6.9311, 7.0035, 7.0463, 7.1298]"


>>  metastatic lung cancer
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,metastatic lung cancer,0,21,,C7800,"[C7800, C349, C7801, Z858, C800, Z8511, C780, C798, C7802, C799, C7830, C7B00]","[cancer metastatic to lung, metastasis from malignant tumor of lung, cancer metastatic to left lung, history of cancer metastatic to lung, metastatic cancer, history of cancer metastatic to lung (situation), metastatic adenocarcinoma to bilateral lungs, cancer metastatic to chest wall, metastatic malignant neoplasm to left lower lobe of lung, metastatic carcinoid tumour, cancer metastatic to respiratory tract, metastatic carcinoid tumor]","[5.4410, 7.2912, 7.3621, 7.4966, 7.6341, 7.9753, 8.4465, 8.7944, 8.8118, 8.8940, 8.9070, 8.9103]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,metastatic lung cancer,0,21,,94391008,"[94391008, 457721000124104, 315006004, 353741000119106, 302818005, 128462008, 269624009, 154576004, 154566002, 15956181000119102, 285604008, 1661000119106, 94253005, 94375005, 94376006, 94522007, 705176003]","[cancer metastatic to lung, metastatic non-small cell lung cancer, metastasis from malignant tumor of lung, cancer metastatic to left lung, metastatic cancer, metastatic cancer, metastatic carcinoma, metastatic carcinoma, metastasis to lung, metastatic adenocarcinoma to bilateral lungs, metastasis to lung of unknown primary, metastasis to lung from adenocarcinoma, cancer metastatic to chest wall, metastatic malignant neoplasm to left lower lobe of lung, metastatic malignant neoplasm to left upper lobe of lung, metastatic malignant neoplasm to right lower lobe of lung, metastatic carcinoid tumour]","[5.4410, 6.8656, 7.2912, 7.3621, 7.6341, 7.6341, 7.9786, 7.9786, 8.1071, 8.4465, 8.4940, 8.7024, 8.7944, 8.8118, 8.8241, 8.8436, 8.8940]"


>>  secondary malignant neoplasm of bone
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,secondary malignant neoplasm of bone,0,35,,C7951,"[C7951, C419, C795, D492, D480, Z8583]","[secondary malignant neoplasm of bone, malignant neoplasm of bone, secondary malignant neoplasm of bone of upper limb, neoplasm of bone, neoplasm of bone (disorder), history of malignant neoplasm of bone]","[0.0000, 4.3887, 4.7477, 5.4522, 6.0126, 6.0982]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,secondary malignant neoplasm of bone,0,35,,94222008,"[94222008, 428281000, 94218003, 94221001, 93725000, 126537000, 94478006, 94470004, 94620005, 94322006, 94447007, 94220000, 94219006, 363371002, 269466003, 94411006]","[secondary malignant neoplasm of bone, malignant neoplasm of bone, secondary malignant neoplasm of bone of face, secondary malignant neoplasm of bone of upper limb, primary malignant neoplasm of bone, neoplasm of bone, secondary malignant neoplasm of pelvic bone, secondary malignant neoplasm of parietal bone, secondary malignant neoplasm of tarsal bone, secondary malignant neoplasm of hamate bone, secondary malignant neoplasm of occipital bone, secondary malignant neoplasm of bone of skull, secondary malignant neoplasm of bone of lower limb, malignant bone tumour, malignant bone tumour, secondary malignant neoplasm of metacarpal bone]","[0.0000, 4.3887, 4.6097, 4.7477, 5.1638, 5.4522, 5.6374, 5.6429, 5.9023, 5.9930, 6.1143, 6.1351, 6.2438, 6.3628, 6.3628, 6.3851]"


>>  type 2 diabetes mellitus
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,type 2 diabetes mellitus,0,23,,E119,"[E119, E118, E113, O2411, E139, E117, E1143, E109, E1169, E114, E112, E1151, E1162]","[type 2 diabetes mellitus, disorder associated with type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus, secondary diabetes mellitus, multiple complications of type 2 diabetes mellitus, neurological disorder with type 2 diabetes mellitus, diabetes mellitus, diabetic dyslipidemia associated with type 2 diabetes mellitus, neurological disorder with diabetes type 2, kidney disorder associated with type 2 diabetes mellitus, peripheral circulatory disorder associated with type 2 diabetes mellitus, type 2 diabetes mellitus with skin complications]","[0.0000, 3.7729, 4.8808, 4.9391, 5.1732, 5.2452, 5.5212, 5.5994, 5.7916, 5.9043, 5.9998, 6.0564, 6.0600]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,type 2 diabetes mellitus,0,23,,44054006,"[44054006, 422014003, 422099009, 199230006, 190384004, 8801005, 420414003, 421326000, 73211009, 191044006, 154671004, 267467004, 761000119102, 190388001]","[type 2 diabetes mellitus, disorder associated with type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus, type ii diabetes mellitus, secondary diabetes mellitus, multiple complications of type 2 diabetes mellitus, neurological disorder with type 2 diabetes mellitus, diabetes mellitus, diabetes mellitus, diabetes mellitus, diabetes mellitus, diabetic dyslipidemia associated with type 2 diabetes mellitus, type 2 diabetes mellitus with multiple complications]","[0.0000, 3.7729, 4.8808, 4.9391, 5.0283, 5.1732, 5.2452, 5.5212, 5.5994, 5.5994, 5.5994, 5.5994, 5.7916, 5.8752]"


>>  type 2 diabetes mellitus/insulin
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,type 2 diabetes mellitus/insulin,0,31,,Z794,"[Z794, E119, E1169, E118, E1165, E113, E116, O2411, E1144, E139]","[insulin treated type 2 diabetes mellitus, type 2 diabetes mellitus, hyperglycemia due to type 2 diabetes mellitus (disorder), disorder associated with type 2 diabetes mellitus, hyperglycemia due to type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, hyperglycaemia due to type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus (disorder), disorder of nervous system due to type 2 diabetes mellitus, secondary endocrine diabetes mellitus (disorder)]","[5.7951, 6.1022, 6.2798, 6.3629, 6.5885, 6.6068, 6.6896, 6.7764, 6.9427, 6.9581]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,type 2 diabetes mellitus/insulin,0,31,,237599002,"[237599002, 366909003, 44054006, 368051000119109, 422014003, 422099009, 199230006, 137931000119102, 190384004, 421326000]","[insulin treated type 2 diabetes mellitus, insulin treated type 2 diabetes mellitus, type 2 diabetes mellitus, hyperglycemia due to type 2 diabetes mellitus (disorder), disorder associated with type 2 diabetes mellitus, disorder of eye with type 2 diabetes mellitus, pre-existing type 2 diabetes mellitus (disorder), hyperlipidaemia due to type 2 diabetes mellitus, type ii diabetes mellitus, disorder of nervous system due to type 2 diabetes mellitus]","[5.7951, 5.7951, 6.1022, 6.2798, 6.3629, 6.6068, 6.7764, 6.8025, 6.8979, 6.9427]"


>>  unsp malignant neoplasm of lymph node
icd10cm_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,unsp malignant neoplasm of lymph node,0,36,,C779,"[C779, C77, D360, C772, C775, D492, D219]","[secondary and unsp malignant neoplasm of lymph node, unsp, secondary and unspecified malignant neoplasm of lymph nodes, benign neoplasm of lymph node, secondary and unsp malignant neoplasm of intra-abd nodes, secondary and unsp malignant neoplasm of intrapelv nodes, neoplasm of lymph node, benign neoplasm of lymph vessels]","[5.9379, 6.2701, 6.7806, 7.2553, 7.6323, 7.6462, 7.8888]"


snomed_code


Unnamed: 0,chunks,begin,end,entity,code,all_codes,resolutions,all_distances
0,unsp malignant neoplasm of lymph node,0,36,,188379006,"[188379006, 271526003, 188432007, 92197001, 189203003, 188706002, 92202008, 92156008, 188707006, 188713002, 92090002, 92203003, 188392009, 188393004, 188709009, 188389005, 188715009, 92199003, 92213006, 127232002, 188423005, 92201001]","[secondary and unspecified malignant neoplasm of lymph nodes, secondary and unspecified malignant neoplasm of lymph nodes, secondary and unspecified malignant neoplasm of lymph nodes nos, benign neoplasm of lymph node, benign neoplasm of lymph nodes, unspecified malignant neoplasm of lymphoid and histiocytic tissue of unspecified site, benign neoplasm of lymph nodes of neck, benign neoplasm of intrathoracic lymph nodes, unspecified malignant neoplasm of lymphoid and histiocytic tissue of lymph nodes of head, face and neck, unspecified malignant neoplasm of lymphoid and histiocytic tissue of intrapelvic lymph nodes, benign neoplasm of epitrochlear lymph nodes, benign neoplasm of lymph nodes of upper limb, secondary and unspecified malignant neoplasm of lymph nodes of head, face and neck nos, secondary and unspecified malignant neoplasm of intrathoracic lymph nodes, unspecified malignant neoplasm of lymphoid and histiocytic tissue of intrathoracic lymph nodes, secondary and unspecified malignant neoplasm of submental lymph nodes, unspecified malignant neoplasm of lymphoid and histiocytic tissue of lymph nodes of multiple sites, benign neoplasm of lymph nodes of head (disorder), benign neoplasm of mediastinal lymph nodes, neoplasm of lymph node, secondary and unspecified malignant neoplasm of intrapelvic lymph nodes, benign neoplasm of lymph nodes of multiple sites (disorder)]","[6.2701, 6.2701, 6.5341, 6.7806, 6.8029, 6.8955, 7.2334, 7.3035, 7.3339, 7.3721, 7.4309, 7.4429, 7.4456, 7.4954, 7.5084, 7.5154, 7.5664, 7.5726, 7.6336, 7.6462, 7.6835, 7.6909]"


##  How to integrate resolvers with NER models in the same pipeline


In [145]:
documentAssembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentenceDetector = SentenceDetectorDLModel.pretrained()\
  .setInputCols(["document"])\
  .setOutputCol("sentence")

tokenizer = Tokenizer()\
  .setInputCols(["sentence"])\
  .setOutputCol("token")\

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
  .setInputCols(["sentence", "token"])\
  .setOutputCol("embeddings")

clinical_ner = NerDLModel.pretrained("ner_clinical", "en", "clinical/models") \
  .setInputCols(["sentence", "token", "embeddings"]) \
  .setOutputCol("ner")

ner_converter = NerConverter() \
  .setInputCols(["sentence", "token", "ner"]) \
  .setOutputCol("ner_chunk")\
  .setWhiteList(['PROBLEM'])

c2doc = Chunk2Doc().setInputCols("ner_chunk").setOutputCol("ner_chunk_doc") 

sbert_embedder = BertSentenceEmbeddings\
      .pretrained("sbiobert_base_cased_mli",'en','clinical/models')\
      .setInputCols(["ner_chunk_doc"])\
      .setOutputCol("sbert_embeddings")

icd10_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_icd10cm_augmented","en", "clinical/models") \
  .setInputCols(["ner_chunk", "sbert_embeddings"]) \
  .setOutputCol("icd10cm_code")\
  .setDistanceFunction("EUCLIDEAN")
  
sbert_resolver_pipeline = Pipeline(
    stages = [
        documentAssembler,
        sentenceDetector,
        tokenizer,
        word_embeddings,
        clinical_ner,
        ner_converter,
        c2doc,
        sbert_embedder,
        icd10_resolver])

data_ner = spark.createDataFrame([[text]]).toDF("text")

sbert_models = sbert_resolver_pipeline.fit(data_ner)



sentence_detector_dl download started this may take some time.
Approximate size to download 307.2 KB
[OK!]
embeddings_clinical download started this may take some time.
Approximate size to download 1.6 GB
[OK!]
ner_clinical download started this may take some time.
Approximate size to download 13.8 MB
[OK!]
sbiobert_base_cased_mli download started this may take some time.
Approximate size to download 384.3 MB
[OK!]
sbiobertresolve_icd10cm_augmented download started this may take some time.
Approximate size to download 1.2 GB
[OK!]


In [158]:

clinical_note = 'A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, associated with an acute hepatitis, and obesity with a body mass index (BMI) of 33.5 kg/m2, presented with a one-week history of polyuria, polydipsia, poor appetite, and vomiting. Two weeks prior to presentation, she was treated with a five-day course of amoxicillin for a respiratory tract infection. She was on metformin, glipizide, and dapagliflozin for T2DM and atorvastatin and gemfibrozil for HTG. She had been on dapagliflozin for six months at the time of presentation. Physical examination on presentation was significant for dry oral mucosa; significantly, her abdominal examination was benign with no tenderness, guarding, or rigidity. Pertinent laboratory findings on admission were: serum glucose 111 mg/dl, bicarbonate 18 mmol/l, anion gap 20, creatinine 0.4 mg/dL, triglycerides 508 mg/dL, total cholesterol 122 mg/dL, glycated hemoglobin (HbA1c) 10%, and venous pH 7.27. Serum lipase was normal at 43 U/L. Serum acetone levels could not be assessed as blood samples kept hemolyzing due to significant lipemia. The patient was initially admitted for starvation ketosis, as she reported poor oral intake for three days prior to admission. However, serum chemistry obtained six hours after presentation revealed her glucose was 186 mg/dL, the anion gap was still elevated at 21, serum bicarbonate was 16 mmol/L, triglyceride level peaked at 2050 mg/dL, and lipase was 52 U/L. The β-hydroxybutyrate level was obtained and found to be elevated at 5.29 mmol/L - the original sample was centrifuged and the chylomicron layer removed prior to analysis due to interference from turbidity caused by lipemia again. The patient was treated with an insulin drip for euDKA and HTG with a reduction in the anion gap to 13 and triglycerides to 1400 mg/dL, within 24 hours. Her euDKA was thought to be precipitated by her respiratory tract infection in the setting of SGLT2 inhibitor use. The patient was seen by the endocrinology service and she was discharged on 40 units of insulin glargine at night, 12 units of insulin lispro with meals, and metformin 1000 mg two times a day. It was determined that all SGLT2 inhibitors should be discontinued indefinitely. She had close follow-up with endocrinology post discharge.'

print (clinical_note)

clinical_note_df = spark.createDataFrame([[clinical_note]]).toDF("text")


A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, associated with an acute hepatitis, and obesity with a body mass index (BMI) of 33.5 kg/m2, presented with a one-week history of polyuria, polydipsia, poor appetite, and vomiting. Two weeks prior to presentation, she was treated with a five-day course of amoxicillin for a respiratory tract infection. She was on metformin, glipizide, and dapagliflozin for T2DM and atorvastatin and gemfibrozil for HTG. She had been on dapagliflozin for six months at the time of presentation. Physical examination on presentation was significant for dry oral mucosa; significantly, her abdominal examination was benign with no tenderness, guarding, or rigidity. Pertinent laboratory findings on admission were: serum glucose 111 mg/dl, bicarbonate 18 mmol/l, anion gap 20

In [161]:

from pyspark.sql import functions as F

icd10_sdf = sbert_models.transform(clinical_note_df)

icd10_sdf = icd10_sdf.select(F.explode(F.arrays_zip("ner_chunk.result","ner_chunk.metadata","icd10cm_code.result","icd10cm_code.metadata","ner_chunk.begin","ner_chunk.end")).alias("icd10cm_code")) \
    .select(F.expr("icd10cm_code['0']").alias("chunk"),
            F.expr("icd10cm_code['4']").alias("begin"),
            F.expr("icd10cm_code['5']").alias("end"),
            F.expr("icd10cm_code['1'].entity").alias("entity"),
            F.expr("icd10cm_code['2']").alias("code"),
            F.expr("icd10cm_code['3'].confidence").alias("confidence"),
            F.expr("icd10cm_code['3'].all_k_resolutions").alias("all_k_resolutions"),
            F.expr("icd10cm_code['3'].all_k_results").alias("all_k_codes"))

icd10_sdf.show()


+--------------------+-----+----+-------+-----+----------+--------------------+--------------------+
|               chunk|begin| end| entity| code|confidence|   all_k_resolutions|         all_k_codes|
+--------------------+-----+----+-------+-----+----------+--------------------+--------------------+
|gestational diabe...|   39|  67|PROBLEM|O2441|    0.9777|gestational diabe...|O2441:::O2443:::Z...|
|type two diabetes...|  128| 153|PROBLEM| E119|    0.6587|type 2 diabetes m...|E119:::E118:::O24...|
|              T2DM),|  156| 161|PROBLEM| E119|    0.1209|diabetes mellitus...|E119:::E109:::E88...|
|HTG-induced pancr...|  184| 207|PROBLEM|K8520|    0.2189|alcohol-induced p...|K8520:::K853:::K8...|
|  an acute hepatitis|  260| 277|PROBLEM| B159|    0.3662|acute hepatitis a...|B159:::K720:::B17...|
|             obesity|  284| 290|PROBLEM| E669|    0.9193|obesity:::abdomin...|E669:::E668:::Z68...|
|   a body mass index|  297| 313|PROBLEM|Z6841|    0.6263|finding of body m...|Z6841:::E669

In [None]:
# with light pipeline

light_model = LightPipeline(sbert_models)

In [None]:
!pip -q install spark-nlp-display

In [148]:
from sparknlp_display import EntityResolverVisualizer

vis = EntityResolverVisualizer()

# Change color of an entity label
vis.set_label_colors({'PROBLEM':'#008080'})

light_data_icd = light_model.fullAnnotate(clinical_note)

vis.display(light_data_icd[0], 'ner_chunk', 'icd10cm_code')
