![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_NER/NLU_explain_clinical_doc_vop_pipeline.ipynb)

#Explain Clinical Document - Oncology - Pipeline

This specialized oncology pipeline can;

- extract oncological entities,

- assign assertion status to the extracted entities,

- establish relations between the extracted entities from the clinical documents.

In [None]:
! pip install nlu pyspark==3.1.2

In [None]:
! pip install johnsnowlabs

In [1]:
import json, os
from google.colab import files

if 'spark_jsl.json' not in os.listdir():
  license_keys = files.upload()
  os.rename(list(license_keys.keys())[0], 'spark_jsl.json')

with open('spark_jsl.json') as f:
    license_keys = json.load(f)

# Defining license key-value pairs as local variables
locals().update(license_keys)
os.environ.update(license_keys)

In [None]:
# Installing pyspark and spark-nlp
! pip install --upgrade -q pyspark==3.1.2 spark-nlp==$PUBLIC_VERSION

# Installing NLU
! pip install --upgrade --q nlu --no-dependencies

# Installing Spark NLP Healthcare
! pip install --upgrade -q spark-nlp-jsl==$JSL_VERSION  --extra-index-url https://pypi.johnsnowlabs.com/$SECRET

# Installing Spark NLP Display Library for visualization
! pip install -q spark-nlp-display

In [2]:
import json
import os

import sparknlp
import sparknlp_jsl
import nlu

from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *

from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.ml import Pipeline,PipelineModel

import pandas as pd
pd.set_option('display.max_colwidth', 200)

import warnings
warnings.filterwarnings('ignore')

params = {"spark.driver.memory":"16G",
          "spark.kryoserializer.buffer.max":"2000M",
          "spark.driver.maxResultSize":"2000M"}

print("Spark NLP Version :", sparknlp.version())
print("Spark NLP_JSL Version :", sparknlp_jsl.version())

spark = sparknlp_jsl.start(license_keys['SECRET'],params=params)

spark

Spark NLP Version : 5.3.0
Spark NLP_JSL Version : 5.3.0


In [3]:
model = nlp.load("en.explain_doc.clinical_oncology.pipeline")

explain_clinical_doc_oncology download started this may take some time.
Approx size to download 1.8 GB
[OK!]


In [4]:
text = ["""The Patient underwent a computed tomography (CT) scan of the abdomen and pelvis, which showed a complex ovarian mass. A Pap smear performed one month later was positive for atypical glandular cells suspicious for adenocarcinoma. The pathologic specimen showed extension of the tumor throughout the fallopian tubes, appendix, omentum, and 5 out of 5 enlarged lymph nodes. The final pathologic diagnosis of the tumor was stage IIIC papillary serous ovarian adenocarcinoma. Two months later, the patient was diagnosed with lung metastases."""]

In [5]:
df = model.predict(text)



In [6]:
df

Unnamed: 0,all_relations_result,assertion,assertion_confidence,document,entities_ner_jsl_chunk,entities_ner_jsl_chunk_class,entities_ner_jsl_chunk_confidence,entities_ner_jsl_chunk_origin_chunk,entities_ner_jsl_chunk_origin_sentence,entities_ner_oncology_anatomy_general_chunk,...,relation_re_oncology_granular_wip_entity1_class,relation_re_oncology_granular_wip_entity1_end,relation_re_oncology_granular_wip_entity2,relation_re_oncology_granular_wip_entity2_begin,relation_re_oncology_granular_wip_entity2_class,relation_re_oncology_granular_wip_entity2_end,relation_re_oncology_granular_wip_origin_sentence,sentence_dl,unlabeled_dependency,word_embedding_embeddings
0,O,"[Past, Past, Present, Past, Present, Possible, Past, Present, Present, Present, Present, Present, Present]","[0.9998, 0.9997, 0.9995, 0.9988, 0.957, 0.9251, 0.9993, 0.9978, 0.9992, 0.9787, 0.9994, 0.9962, 0.9975]","The Patient underwent a computed tomography (CT) scan of the abdomen and pelvis, which showed a complex ovarian mass. A Pap smear performed one month later was positive for atypical glandular cell...","[adenocarcinoma, tumor, tumor, papillary serous ovarian adenocarcinoma, lung metastases]","[Oncological, Oncological, Oncological, Oncological, Oncological]","[0.9974, 0.8333, 0.9892, 0.60825, 0.96220005]","[0, 1, 2, 3, 4]","[1, 2, 3, 3, 4]","[ovarian, fallopian tubes, appendix, omentum, lymph nodes, lung]",...,"[Site_Other_Body_Part, Site_Other_Body_Part, Site_Other_Body_Part, Pathology_Test, Tumor_Finding, Tumor_Finding, Tumor_Finding]","[67, 78, 110, 128, 281, 281, 281]","[mass, mass, mass, adenocarcinoma, fallopian tubes, appendix, omentum]","[112, 112, 112, 213, 298, 315, 325]","[Tumor_Finding, Tumor_Finding, Tumor_Finding, Cancer_Dx, Site_Other_Body_Part, Site_Other_Body_Part, Site_Other_Body_Part]","[115, 115, 115, 226, 312, 322, 331]","[0, 0, 0, 1, 2, 2, 2]","[The Patient underwent a computed tomography (CT) scan of the abdomen and pelvis, which showed a complex ovarian mass., A Pap smear performed one month later was positive for atypical glandular ce...","[underwent, underwent, ROOT, tomography, tomography, underwent, CT, tomography, CT, tomography, abdomen, abdomen, scan, pelvis, abdomen, abdomen, showed, abdomen, mass, mass, mass, showed, underwe...","[[-0.0823686420917511, -0.3178570866584778, 0.07588467001914978, -0.5767543911933899, -0.5021305084228516, -0.27459028363227844, -0.22776539623737335, 0.2133890837430954, 0.03893626481294632, -0.0..."


In [8]:
df[["entities_ner_jsl_chunk", "entities_ner_jsl_chunk_class", "assertion"]]

Unnamed: 0,entities_ner_jsl_chunk,entities_ner_jsl_chunk_class,assertion
0,"[adenocarcinoma, tumor, tumor, papillary serous ovarian adenocarcinoma, lung metastases]","[Oncological, Oncological, Oncological, Oncological, Oncological]","[Past, Past, Present, Past, Present, Possible, Past, Present, Present, Present, Present, Present, Present]"
