In [None]:
import json, os
from google.colab import files

if 'spark_jsl.json' not in os.listdir():
  license_keys = files.upload()
  os.rename(list(license_keys.keys())[0], 'spark_jsl.json')

with open('spark_jsl.json') as f:
    license_keys = json.load(f)

# Defining license key-value pairs as local variables
locals().update(license_keys)
os.environ.update(license_keys)

In [None]:
# Installing pyspark and spark-nlp
! pip install --upgrade -q pyspark==3.4.1 spark-nlp==$PUBLIC_VERSION

# Installing Spark NLP Healthcare
! pip install --upgrade -q spark-nlp-jsl==$JSL_VERSION  --extra-index-url https://pypi.johnsnowlabs.com/$SECRET

# Installing Spark NLP Display Library for visualization
! pip install -q spark-nlp-display

In [None]:
import sparknlp
import sparknlp_jsl

from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *

from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.ml import Pipeline,PipelineModel
from pyspark.sql.types import StringType, IntegerType

import pandas as pd
pd.set_option('display.max_colwidth', 200)

import warnings
warnings.filterwarnings('ignore')

params = {"spark.driver.memory":"52G",
          "spark.kryoserializer.buffer.max":"2000M",
          "spark.driver.maxResultSize":"2000M"}

spark = sparknlp_jsl.start(license_keys['SECRET'],params=params)

print("Spark NLP Version :", sparknlp.version())
print("Spark NLP_JSL Version :", sparknlp_jsl.version())

spark

Spark NLP Version : 5.4.0
Spark NLP_JSL Version : 5.4.0


## ner_stigmatization_wip

In [None]:
document_assembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\
    .setInputCols(["document"])\
    .setOutputCol("sentence")

tokenizer = Tokenizer()\
    .setInputCols(["sentence"])\
    .setOutputCol("token")

clinical_embeddings = WordEmbeddingsModel.pretrained('embeddings_clinical', "en", "clinical/models")\
    .setInputCols(["sentence", "token"])\
    .setOutputCol("embeddings")

ner_model = MedicalNerModel.pretrained("ner_stigmatization_wip", "en", "clinical/models")\
    .setInputCols(["sentence", "token","embeddings"])\
    .setOutputCol("ner")

ner_converter = NerConverterInternal()\
    .setInputCols(['sentence', 'token', 'ner'])\
    .setOutputCol('ner_chunk')

pipeline = Pipeline(stages=[
    document_assembler,
    sentence_detector,
    tokenizer,
    clinical_embeddings,
    ner_model,
    ner_converter
    ])



sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]
embeddings_clinical download started this may take some time.
Approximate size to download 1.6 GB
[OK!]


In [None]:
ner_model.getClasses()

['O',
 'I-Compliant',
 'B-Compliant',
 'B-Aggressive',
 'B-Positive_Assessment',
 'I-Positive_Assessment',
 'B-Noncompliant',
 'I-Credibility_Doubts',
 'I-Argumentative',
 'I-TREATMENT',
 'I-Suspected_DSB',
 'B-TREATMENT',
 'B-Argumentative',
 'B-Suspected_DSB',
 'I-PROBLEM',
 'I-Poor_Reasoning',
 'B-Disoriented',
 'B-Paternalistic_Tone',
 'B-Poor_Reasoning',
 'I-Noncompliant',
 'I-Other_Discriminatory_Language',
 'B-TEST',
 'B-Credibility_Doubts',
 'B-Calm',
 'I-Neglected_Appearance',
 'B-Neglected_Appearance',
 'I-Resistant',
 'B-Resistant',
 'I-Collaborative_Decision_Making',
 'B-Other_Discriminatory_Language',
 'B-Positive_Descriptors',
 'I-TEST',
 'I-Calm',
 'B-Collaborative_Decision_Making',
 'I-Aggressive',
 'I-Disoriented',
 'B-PROBLEM',
 'I-Poor_Decision_Making',
 'I-Paternalistic_Tone',
 'B-Poor_Decision_Making']

In [43]:
sample_texts = [
"""The healthcare team observed that Mr. Smith exhibited somewhat aggressive behavior and was highly irritable, especially when discussing his treatment plan. He showed a full range of emotions and fixated on certain incorrect beliefs about his health. Concerns about his poor insight and judgment were frequently discussed in multidisciplinary team meetings. For example, he often insisted that his symptoms were purely due to stress.""",
"""Once stabilized, Mr. Smith was discharged with a comprehensive care plan emphasizing the importance of medication adherence and regular follow-up appointments. Despite extensive counseling on the risks associated with non-compliance, concerns about his judgment persisted. He expressed skepticism about the need for certain medications, particularly those for managing his diabetes and COPD.""",
"""David Brown's hospital stay underscored the significant impact of poor reasoning and judgment on his health outcomes. His initial reluctance to seek care and resistance to necessary treatments highlighted the crucial need for patient education and compliance. Moving forward, strict adherence to his treatment plan and regular follow-up are vital to preventing further complications and ensuring his ongoing well-being.""",
"""Despite his confrontational attitude, efforts were made to educate Mr. Brown on the importance of following his treatment plan and dietary restrictions. Multiple attempts to discuss his condition and the need for continuous care were met with defensiveness. He declined several recommendations, becoming agitated and tearful during discussions about his health.""",
"""Efforts to educate Ms. Martin on the importance of adhering to her asthma management plan were met with resistance. She frequently questioned the necessity of her medications and expressed dissatisfaction with her care. Despite these challenges, the team remained dedicated to providing thorough care, working to address her concerns and educate her on the importance of following her treatment regimen. Ms. Martin became particularly agitated when discussing her anxiety and the impact of her asthma on her quality of life. "No one understands how hard this is for me," she argued during a consultation with the psychiatrist. Despite her defensive attitude, the team continued to offer support and reassurance, acknowledging the complexity of her psychosocial barriers to care.""",
"""History of Present Illness: Ms. ___ is a very pleasant ___ female who underwent a left partial mastectomy and left axillary sentinel node biopsy on ___ for left invasive ductal carcinoma. Her surgical pathology report indicated that all six margins were either involved with or close to atypical or carcinoma cells. We decided to go with a global re-excision lumpectomy, which was then performed on ___."""
]

data = spark.createDataFrame(sample_texts, StringType()).toDF("text")

result = pipeline.fit(data).transform(data)

In [44]:
result.select(F.explode(F.arrays_zip(result.ner_chunk.result,
                                     result.ner_chunk.begin,
                                     result.ner_chunk.end,
                                     result.ner_chunk.metadata)).alias("cols")) \
               .select(F.expr("cols['0']").alias("chunk"),
                       F.expr("cols['1']").alias("begin"),
                       F.expr("cols['2']").alias("end"),
                       F.expr("cols['3']['entity']").alias("ner_label"))\
                       .filter("ner_label!='O'")\
                       .show(truncate=False)

+---------------------------+-----+---+-----------------------------+
|chunk                      |begin|end|ner_label                    |
+---------------------------+-----+---+-----------------------------+
|aggressive                 |63   |72 |Aggressive                   |
|irritable                  |98   |106|Aggressive                   |
|poor insight and judgment  |269  |293|Poor_Reasoning               |
|discussed                  |311  |319|Collaborative_Decision_Making|
|insisted                   |379  |386|Credibility_Doubts           |
|his symptoms               |393  |404|PROBLEM                      |
|stress                     |425  |430|PROBLEM                      |
|adherence                  |114  |122|Compliant                    |
|non-compliance             |218  |231|Noncompliant                 |
|judgment                   |253  |260|Poor_Reasoning               |
|certain medications        |316  |334|TREATMENT                    |
|his diabetes       

In [45]:
light_model = LightPipeline(pipeline.fit(data))

light_result = light_model.fullAnnotate(sample_texts)


chunks = []
entities = []
sentence= []
begin = []
end = []
confidence = []

for i in range(len(light_result)):
  for n in light_result[i]['ner_chunk']:

      begin.append(n.begin)
      end.append(n.end)
      chunks.append(n.result)
      entities.append(n.metadata['entity'])
      sentence.append(n.metadata['sentence'])
      confidence.append(n.metadata["confidence"])


df_clinical = pd.DataFrame({'chunks':chunks,
                            'begin': begin,
                            'end':end,
                            'sentence_id':sentence,
                            'entities':entities,
                            'confidence':confidence})

df_clinical

Unnamed: 0,chunks,begin,end,sentence_id,entities,confidence
0,aggressive,63,72,0,Aggressive,0.9235
1,irritable,98,106,0,Aggressive,0.999
2,poor insight and judgment,269,293,2,Poor_Reasoning,0.9554
3,discussed,311,319,2,Collaborative_Decision_Making,0.9082
4,insisted,379,386,3,Credibility_Doubts,0.9976
5,his symptoms,393,404,3,PROBLEM,0.96815
6,stress,425,430,3,PROBLEM,0.9738
7,adherence,114,122,0,Compliant,1.0
8,non-compliance,218,231,1,Noncompliant,1.0
9,judgment,253,260,1,Poor_Reasoning,0.9924


In [46]:
from sparknlp_display import NerVisualizer

visualiser = NerVisualizer()

for i in range(len(light_result)):
  print(f"Example_{i+1}")
  visualiser.display(light_result[i], label_col='ner_chunk', document_col='document', save_path=f"display_result_{i}.html")
  print("\n")


Example_1




Example_2




Example_3




Example_4




Example_5




Example_6




