In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
colab_path = "/content/drive/My Drive/Colab Notebooks/"

In [None]:
import json

with open(colab_path+'keys.json') as f:
    license_keys = json.load(f)

license_keys.keys()


dict_keys(['version', 'secret', 'SPARK_NLP_LICENSE', 'JSL_OCR_LICENSE', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'JSL_OCR_SECRET'])

In [None]:
import os

# Install java
! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
! java -version

openjdk version "1.8.0_252"
OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)
OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)


In [None]:
secret = license_keys['secret']
os.environ['SPARK_NLP_LICENSE'] = license_keys['SPARK_NLP_LICENSE']
os.environ['JSL_OCR_LICENSE'] = license_keys['JSL_OCR_LICENSE']
os.environ['AWS_ACCESS_KEY_ID']= license_keys['AWS_ACCESS_KEY_ID']
os.environ['AWS_SECRET_ACCESS_KEY'] = license_keys['AWS_SECRET_ACCESS_KEY']
version = license_keys['version']

In [None]:
! python -m pip install --upgrade spark-nlp-jsl==$version  --extra-index-url https://pypi.johnsnowlabs.com/$secret

In [None]:
import sparknlp

print (sparknlp.version())

import json
import os
from pyspark.ml import Pipeline
from pyspark.sql import SparkSession, functions as F
import pandas as pd


from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
from sparknlp.base import *
import sparknlp_jsl



def start(secret):
    builder = SparkSession.builder \
        .appName("Spark NLP Licensed") \
        .master("local[*]") \
        .config("spark.driver.memory", "16G") \
        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
        .config("spark.kryoserializer.buffer.max", "2000M") \
        .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.11:2.5.2") \
        .config("spark.jars", "https://pypi.johnsnowlabs.com/"+secret+"/spark-nlp-jsl-"+version+".jar")
      
    return builder.getOrCreate()


spark = start(secret)

2.5.2


# Train Snomed Entity Resolver

We will asume we have downloaded SNOMED CT dataset, and we have the Terminology text files mounted `colab_path`. We will play with them a bit to have a desired format and then train an Entity Resolution model from scratch using SparkNLP Enterprise.

With this trained model SparkNLP Enterprise can then spot this entities in free form text and normalize their appearances to some canonical representation

## Step 1. Load the data, filter it and merge it

In [None]:
snomed_path = colab_path + "snomed_ct/"
concepts_path = "sct2_Concept_Full_US1000124_20190901.txt"
descs_path = "sct2_Description_Full-en_US1000124_20190901.txt"
rels_path = "sct2_Relationship_Full_US1000124_20190901.txt"

concepts_raw = spark.read.csv(os.path.join(snomed_path, concepts_path), sep="\t", header=True)
descs_raw = spark.read.csv(os.path.join(snomed_path, descs_path), sep="\t", header=True)
rels_raw = spark.read.csv(os.path.join(snomed_path, rels_path), sep="\t", header=True)

### Let's get of the latest valid concepts

In [None]:
from pyspark.sql import SparkSession, functions as F
concepts = concepts_raw\
        .groupby("id")\
        .agg(F.expr("max(active) as active"), F.expr("max(effectiveTime) as effectiveTime"))\
        .where("active=1")\
        .join(concepts_raw, on=["id","active","effectiveTime"])\
        .selectExpr("id as conceptId", "active as active_concept")

descs = descs_raw.join(concepts, "conceptId")\
        .groupby("conceptId")\
        .agg(F.expr("max(active) as active"), F.expr("max(effectiveTime) as effectiveTime"))\
        .where("active=1")\
        .join(descs_raw, on=["conceptId","active","effectiveTime"])\
        .selectExpr(*descs_raw.columns)

rels = rels_raw.join(concepts, F.expr("sourceId=conceptId or destinationId=conceptId"))\
        .groupby("sourceId","destinationId","typeId")\
        .agg(F.expr("max(active) as active"), F.expr("max(effectiveTime) as effectiveTime"))\
        .where("active=1")\
        .join(rels_raw, on=["sourceId","destinationId","typeId","active","effectiveTime"])\
        .selectExpr(*rels_raw.columns)

### Let's filter the tree of concepts extracting unique `is_a` relationships (just one parent per concept)

In [None]:
isa_rels_raw = rels_raw.where("typeId='116680003'").groupby("sourceId","destinationId","typeId")\
        .agg(F.expr("max(active) as active"), F.expr("max(effectiveTime) as effectiveTime"))\
        .join(rels_raw, on=["sourceId","destinationId","typeId","active","effectiveTime"])\
        .where("active=1")

isa_rels_1 = isa_rels_raw.join(concepts, F.expr("sourceId=conceptId")).drop(*concepts.columns)\
        .join(concepts, F.expr("destinationId=conceptId")).drop(*concepts.columns).distinct()
        
isa_rels_2 = isa_rels_1.groupby("sourceId")\
        .agg(F.expr("min(destinationId) as destinationId"))\
        .join(isa_rels_1, on=["sourceId","destinationId"])\
        .selectExpr(*rels_raw.columns)

#### Let's persist and reolad just to speed up things downstream

In [None]:
concepts.write.mode("overwrite").save(snomed_path+"interim/concepts.parquet")
descs.write.mode("overwrite").save(snomed_path+"interim/descs.parquet")
isa_rels_2.write.mode("overwrite").save(snomed_path+"interim/isa_rels.parquet")

In [None]:
concepts = spark.read.load(snomed_path+"interim/concepts.parquet")
descs = spark.read.load(snomed_path+"interim/descs.parquet")
isa_rels = spark.read.load(snomed_path+"interim/isa_rels.parquet")

In [None]:
total_count = descs.count()
distinct_idstr_count = descs.select("conceptId","term").distinct().count()
concepts_count = concepts.select("conceptId").distinct().count()

print("Total:",total_count, "dist_str:", distinct_idstr_count, "dist_id", concepts_count)

Total: 809103 dist_str: 809103 dist_id 357406


#### Given SNOMED is a graph, let's try to build a tabular representation by starting in the top node and recursively going down to the leafs

In [None]:
hierarchies = isa_rels\
    .where("typeId='116680003' and destinationId='138875005' and sourceId!='404684003'").select("sourceId")\
    .union(isa_rels.where("typeId='116680003' and destinationId='404684003'").select("sourceId"))\
    .distinct()\
    .join(concepts, on=F.col("sourceId")==F.col("conceptId"))\
    .join(descs.where("typeId='900000000000003001'"),"conceptId")\
    .select("conceptId","term")

In [None]:
current_level = hierarchies.selectExpr("conceptId", "conceptId as directParentId", "conceptId as topParentId", "0 as level", "concat('/',conceptId) as path")
all_levels = current_level
level = 1
cont=True
while cont:
    current_children = isa_rels.join(current_level, F.col("destinationId")==F.col("conceptId")).cache()
    current_level = current_children.selectExpr("sourceId as conceptId", "conceptId as directParentId", "topParentId", f"{level} as level", "concat(path,'/',sourceId) as path")
    all_levels = all_levels.union(current_level)
    if current_children.count() == 0:
        cont = False
    level += 1

In [None]:
all_levels_write = all_levels.join(descs, on="conceptId")\
    .join(hierarchies.selectExpr("term as topTerm","conceptId as hConceptId"), on=F.col("topParentId")==F.col("hConceptId"))\
    .drop("hConceptId")\
    .select("conceptId","directParentId","topParentId","level","term","topTerm").cache()

In [None]:
all_levels_write.write.mode("overwrite").save(snomed_path+"interim/all_levels.parquet")

In [None]:
all_levels_write.columns

['conceptId', 'directParentId', 'topParentId', 'level', 'term', 'topTerm']

In [None]:
concepts_covered_count = all_levels_write.select("conceptId").distinct().count()
print("dist_id_covered", concepts_covered_count, ":", concepts_covered_count / concepts_count)

dist_id_covered 335192 : 0.9378465946290773


## Step 2: Preprocessing it with Spark NLP pipeline and train some Entity Resolvers with it

In [None]:
concepts_clean = spark.read.load(snomed_path+"interim/all_levels.parquet")

In [None]:
we = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
  .setInputCols("doc","token").setOutputCol("embeddings")

embeddings_clinical download started this may take some time.
Approximate size to download 1.6 GB
[OK!]


In [None]:
da = DocumentAssembler().setInputCol("term").setOutputCol("doc")

tk = Tokenizer().setInputCols("doc").setOutputCol("token").setContextChars([".", ",", ";", ":", "!", "?", "*", "-", "(", ")", "\"", "'","(",")","+","-"])\
  .setSplitChars(["'","\\"",","/"," ",".","|","@","#","%","&","\\$","\\[","\\]","\\(","\\)","-",";"])

d2c = Doc2Chunk().setInputCols("doc").setOutputCol("chunk")

ce = ChunkEmbeddings().setInputCols("chunk","embeddings").setOutputCol("chunk_embeddings")

In [None]:
trainPipelineModel = Pipeline().setStages([da, tk, we, d2c, ce]).fit(concepts_clean)

In [None]:
concepts_embedded = trainPipelineModel.transform(concepts_clean)
concepts_embedded.drop("embeddings").write.mode("overwrite").save(snomed_path+"interim/concepts_embedded.parquet")
concepts_embedded = spark.read.load(snomed_path+"interim/concepts_embedded.parquet").withColumn("_term", F.col("term")).withColumn("term", F.expr("lower(term)"))

In [None]:
topterm_dist = concepts_embedded.groupby("topTerm").count().toPandas().sort_values("count",ascending=False)

In [None]:
nodes = topterm_dist[topterm_dist.topTerm.str.endswith("finding)")].topTerm

In [None]:
nodes_filter = ",".join([f"'{tt}'" for tt in nodes])
training_set = concepts_embedded.where(f"topTerm in ({nodes_filter})")

In [None]:
training_set.columns

['conceptId',
 'directParentId',
 'topParentId',
 'level',
 'term',
 'topTerm',
 'doc',
 'token',
 'chunk',
 'chunk_embeddings',
 '_term']

In [None]:
er = ChunkEntityResolverApproach().setInputCols("token","chunk_embeddings").setOutputCol("snomed_findings")\
      .setNormalizedCol("_term")\
      .setLabelCol("conceptId")\
      .setNeighbours(250)\
      .setAlternatives(10)\
      .setExtramassPenalty(2)\
      .setThreshold(1e6)\
      .setEnableLevenshtein(True)\
      .setCaseSensitive(False)\
      .setDistanceWeights([1,5,5,0,0,1])

In [None]:
erModels = er.fit(training_set)

## Step 3: We now have a SNOMED model for Entity Resolution, let's test it on some real world data

In [None]:
from IPython.core.display import display, HTML
notes = []
notes.append("""A 35-year-old African-American man was referred to our urology clinic by his primary care physician for consultation about a large left scrotal mass. The patient reported a 3-month history of left scrotal swelling that had progressively increased in size and was associated with mild left scrotal pain. He also had complaints of mild constipation, with hard stools every other day. He denied any urinary complaints. On physical examination, a hard paratesticular mass could be palpated in the left hemiscrotum extending into the left groin, separate from the left testicle, and measuring approximately 10 × 7 cm in size. A hard, lower abdominal mass in the suprapubic region could also be palpated in the midline. The patient was admitted urgently to the hospital for further evaluation with cross-sectional imaging and blood work.
Laboratory results, including results of a complete blood cell count with differential, liver function tests, coagulation panel, and basic chemistry panel, were unremarkable except for a serum creatinine level of 2.6 mg/dL. Typical markers for a testicular germ cell tumor were within normal limits: the beta–human chorionic gonadotropin level was less than 1 mIU/mL and the alpha fetoprotein level was less than 2.8 ng/mL. A CT scan of the chest, abdomen, and pelvis with intravenous contrast was obtained, and it showed large multifocal intra-abdominal, retroperitoneal, and pelvic masses (Figure 1). On cross-sectional imaging, a 7.8-cm para-aortic mass was visualized compressing the proximal portion of the left ureter, creating moderate left hydroureteronephrosis. Additionally, three separate pelvic masses were present in the retrovesical space, each measuring approximately 5 to 10 cm at their largest diameter; these displaced the bladder anteriorly and the rectum posteriorly.
The patient underwent ultrasound-guided needle biopsy of one of the pelvic masses on hospital day 3 for definitive diagnosis. Microscopic examination of the tissue by our pathologist revealed cellular islands with oval to elongated, irregular, and hyperchromatic nuclei; scant cytoplasm; and invading fibrous tissue—as well as three mitoses per high-powered field (Figure 2). Immunohistochemical staining demonstrated strong positivity for cytokeratin AE1/AE3, vimentin, and desmin. Further mutational analysis of the cells detected the presence of an EWS-WT1 fusion transcript consistent with a diagnosis of desmoplastic small round cell tumor.""")
notes.append("""A 72-year-old man with a history of diabetes mellitus, hypertension, and hypercholesterolemia self-palpated a left submandibular lump in 2012. Complete blood count (CBC) in his internist’s office showed solitary leukocytosis (white count 22) with predominant lymphocytes for which he was referred to a hematologist. Peripheral blood flow cytometry on 04/11/12 confirmed chronic lymphocytic leukemia (CLL)/small lymphocytic lymphoma (SLL): abnormal cell population comprising 63% of CD45 positive leukocytes, co-expressing CD5 and CD23 in CD19-positive B cells. CD38 was negative but other prognostic markers were not assessed at that time. The patient was observed regularly for the next 3 years and his white count trend was as follows: 22.8 (4/2012) --> 28.5 (07/2012) --> 32.2 (12/2012) --> 36.5 (02/2013) --> 42 (09/2013) --> 44.9 (01/2014) --> 75.8 (2/2015). His other counts stayed normal until early 2015 when he also developed anemia (hemoglobin [HGB] 10.9) although platelets remained normal at 215. He had been noticing enlargement of his cervical, submandibular, supraclavicular, and axillary lymphadenopathy for several months since 2014 and a positron emission tomography (PET)/computed tomography (CT) scan done in 12/2014 had shown extensive diffuse lymphadenopathy within the neck, chest, abdomen, and pelvis. Maximum standardized uptake value (SUV max) was similar to low baseline activity within the vasculature of the neck and chest. In the abdomen and pelvis, however, there was mild to moderately hypermetabolic adenopathy measuring up to SUV of 4. The largest right neck nodes measured up to 2.3 x 3 cm and left neck nodes measured up to 2.3 x 1.5 cm. His right axillary lymphadenopathy measured up to 5.5 x 2.6 cm and on the left measured up to 4.8 x 3.4 cm. Lymph nodes on the right abdomen and pelvis measured up to 6.7 cm and seemed to have some mass effect with compression on the urinary bladder without symptoms. He underwent a bone marrow biopsy on 02/03/15, which revealed hypercellular marrow (60%) with involvement by CLL (30%); flow cytometry showed CD38 and ZAP-70 positivity; fluorescence in situ hybridization (FISH) analysis showed 13q deletion/monosomy 13; IgVH was unmutated; karyotype was 46XY.""")
notes.append("A 56-year-old woman began to experience vertigo, headaches, and frequent falls. A computed tomography (CT) scan of the brain revealed the presence of a 1.6 x 1.6 x 2.1 cm mass involving the fourth ventricle (Figure 14.1). A gadolinium-enhanced magnetic resonance imaging (MRI) scan confirmed the presence of the mass, and a stereotactic biopsy was performed that demonstrated a primary central nervous system lymphoma (PCNSL) with a diffuse large B-cell histology. Complete blood count (CBC), lactate dehydrogenase (LDH), and beta-2-microglobulin were normal. Systemic staging with a positron emission tomography (PET)/CT scan and bone marrow biopsy showed no evidence of lymphomatous involvement outside the CNS. An eye exam and lumbar puncture showed no evidence of either ocular or leptomeningeal involvement.") 
notes.append("An 83-year-old female presented with a progressing pruritic cutaneous rash that started 8 years ago. On clinical exam there were numerous coalescing, infiltrated, scaly, and partially crusted erythematous plaques distributed over her trunk and extremities and a large fungating ulcerated nodule on her right thigh covering 75% of her total body surface area (Figure 10.1). Lymphoma associated alopecia and a left axillary lymphadenopathy were also noted. For the past 3–4 months she reported fatigue, severe pruritus, night sweats, 20 pounds of weight loss, and loss of appetite.")
output = ""
for i,n in enumerate(notes):
  output += f"Note {i}\n"
  output += ('<div style="border:2px solid #747474; background-color: #e3e3e3; margin: 5px; padding: 10px">')
  output += (n)
  output += ('</div>')

HTML(output)

### Let's run the very same pipeline on these notes + the NERDL pretrained model for clinical Problems, Tests and Treatments. We are specifically interested in Problems today so we can link them to snomed findings.

In [None]:
docid_col         = "doc_id"
note_col          = "text_feed"

data_notes = spark.createDataFrame([(i, n.lower()) for i,n in enumerate(notes)]).toDF(docid_col, note_col)

In [None]:
da.setInputCol("text_feed")

ner = NerDLModel.pretrained("ner_clinical", "en", "clinical/models") \
  .setInputCols(["doc", "token", "embeddings"]) \
  .setOutputCol("ner")

prob = NerConverter() \
  .setInputCols(["doc", "token", "ner"]) \
  .setOutputCol("chunk").setWhiteList(["PROBLEM"])

ner_clinical download started this may take some time.
Approximate size to download 13.8 MB
[OK!]


In [None]:
notesPipelineModel = Pipeline(stages=[da, tk, we, ner, prob, ce, 
                                      erModels]).fit(data_notes)

In [None]:
annotated = notesPipelineModel.transform(data_notes).cache()

In [None]:
def quick_metadata_analysis(df, doc_field, chunk_field, code_fields):
    code_res_meta = ", ".join([f"{cf}.metadata" for cf in code_fields])
    expression = f"explode(arrays_zip({chunk_field}.begin, {chunk_field}.end, {chunk_field}.result, {chunk_field}.metadata, "+code_res_meta+")) as a"
    top_n_rest = [(f"float(a['{i+4}'].confidence) as {(cf.split('_')[0])}_conf",
                    f"arrays_zip(split(a['{i+4}'].all_k_confidences,':::'),split(a['{i+4}'].all_k_results,':::'),split(a['{i+4}'].all_k_resolutions,':::')) as {cf.split('_')[0]+'_opts'}")
                    for i, cf in enumerate(code_fields)]
    top_n_rest_args = []
    for tr in top_n_rest:
        for t in tr:
            top_n_rest_args.append(t)
    return df.selectExpr(doc_field, expression) \
        .orderBy(docid_col, F.expr("a['0']"), F.expr("a['1']"))\
        .selectExpr(f"concat_ws('::',{doc_field},a['0'],a['1']) as coords", "a['2'] as chunk","a['3'].entity as entity", *top_n_rest_args)

In [None]:
import pandas as pd
pd.set_option("display.max_colwidth",10000)
snomed = \
quick_metadata_analysis(annotated, docid_col, "chunk",["snomed_findings"]).toPandas()

In [None]:
snomed[snomed.snomed_conf > 0.11]

Unnamed: 0,coords,chunk,entity,snomed_conf,snomed_opts
1,0::192::212,left scrotal swelling,PROBLEM,0.1289,"[(0.1289, 390919006, On examination - left scrotal swelling), (0.1095, 268946005, On examination - scrotal swelling), (0.1089, 390918003, On examination - right scrotal swelling), (0.1015, 762916009, Swelling of left foot), (0.0978, 442648006, Swelling of left tonsil), (0.0923, 762896009, Swelling of left lower limb), (0.0919, 12242351000119109, Swelling of left upper limb), (0.0908, 15952141000119106, Left parotid gland swelling (finding)), (0.0900, 16696741000119103, Swelling of left ear structure), (0.0885, 10678471000119104, Localised swelling of left foot)]"
4,0::329::345,mild constipation,PROBLEM,0.1267,"[(0.1267, 21782001, Drug-induced constipation), (0.1056, 111360009, Intractable constipation), (0.1049, 430097009, Spastic constipation), (0.1015, 432414001, Atonic constipation), (0.0969, 197119006, Acute constipation (finding)), (0.0952, 40196000, Mild pain (finding)), (0.0951, 236070005, Simple constipation (finding)), (0.0937, 301380003, Mild present pain), (0.0908, 58230007, Intermittent constipation pattern), (0.0898, 370218001, Mild asthma (disorder))]"
5,0::353::363,hard stools,PROBLEM,0.1988,"[(0.1988, 75295004, Hard stools), (0.1067, 27731006, Semisolid stools), (0.1022, 398032003, Loose stools), (0.1000, 2901004, Tarry stools), (0.0997, 35064005, Black stools), (0.0888, 64412006, Red stools (finding)), (0.0809, 289775006, Cervix hard (finding)), (0.0795, 88351001, Hard water syndrome), (0.0722, 126806005, Neoplasm of hard palate), (0.0712, 426014000, Fistula of hard palate)]"
7,0::441::466,a hard paratesticular mass,PROBLEM,0.1541,"[(0.1541, 102031000119109, Paratesticular mass), (0.1203, 6370001000004104, Mass of hard palate), (0.0962, 14160001000004107, Vallecular mass), (0.0925, 126806005, Neoplasm of hard palate), (0.0923, 163292001, On examination - abdominal mass - hard (finding)), (0.0915, 69559004, Mass of retroperitoneal structure), (0.0911, 13420001000004101, Periaortic mass), (0.0889, 444905003, Mass of soft tissue (finding)), (0.0870, 723111007, Hypopharyngeal mass in neonate), (0.0861, 92129006, Benign tumor of hard palate)]"
10,0::1354::1402,"large multifocal intra-abdominal, retroperitoneal",PROBLEM,0.1152,"[(0.1152, 363420003, Malignant retroperitoneal tumor (disorder)), (0.1116, 236017004, Malignant retroperitoneal fibrosis (disorder)), (0.1051, 1090321000119103, Postprocedural retroperitoneal abscess (disorder)), (0.1014, 127253006, Neoplasm of retroperitoneal lymph nodes (disorder)), (0.0996, 385016002, Extraprostatic extension of tumor present, multifocal), (0.0967, 92170008, Benign neoplasm of large intestine), (0.0935, 405579008, Nontraumatic perforation of large intestine), (0.0933, 92324006, Benign neoplasm of retroperitoneal lymph nodes (disorder)), (0.0928, 247151007, Macula - multifocal placoid lesions (finding)), (0.0907, 93854002, Primary malignant neoplasm of large intestine)]"
11,0::1409::1427,pelvic masses (figu,PROBLEM,0.1107,"[(0.1107, 717717002, Non-cyclic pelvic pain (finding)), (0.1094, 783189008, Pelvic sideshift (finding)), (0.1001, 203508001, Bony pelvic pain (finding)), (0.0976, 283137007, Contusion of pelvic region), (0.0975, 239992008, Bursitis of pelvic region), (0.0974, 283129004, Blister of pelvic region), (0.0972, 74285003, Mass of pelvic structure (finding)), (0.0969, 57027007, Periostitis of pelvic region), (0.0966, 162147009, Complaining of pelvic pain (finding)), (0.0965, 126561007, Neoplasm of pelvic bone (disorder))]"
12,0::1463::1487,a 7.8-cm para-aortic mass,PROBLEM,0.1216,"[(0.1216, 300848003, Observation of a mass), (0.1142, 14160001000004107, Vallecular mass), (0.1102, 237047009, Tubo-ovarian mass (finding)), (0.1082, 13420001000004101, Periaortic mass), (0.0962, 450878002, Tonsillar mass), (0.0934, 87860000, Mass of testis), (0.0901, 443607001, Palpable mass (finding)), (0.0898, 102031000119109, Paratesticular mass), (0.0892, 281398003, Groin mass (finding)), (0.0871, 309082002, Epigastric mass (finding))]"
14,0::1753::1790,these displaced the bladder anteriorly,PROBLEM,0.1124,"[(0.1124, 271630007, Trachea displaced to left (disorder)), (0.1123, 271631006, Trachea displaced to right (disorder)), (0.1040, 784285002, Tenesmus of bladder (finding)), (0.1034, 197230002, Pseudodiverticulum of the rectum), (0.1015, 366267009, Finding of bladder neck position (finding)), (0.1005, 26797002, Hyperaemia of bladder), (0.0963, 289656006, Uterine fundus displaced below introitus with traction (finding)), (0.0917, 94743005, Neoplasm of uncertain behaviour of apex of urinary bladder), (0.0890, 95012001, Neoplasm of uncertain behaviour of posterior wall of urinary bladder), (0.0888, 202326005, Protrusio acetabuli of the pelvic region and thigh)]"
17,0::2068::2105,hyperchromatic nuclei; scant cytoplasm,PROBLEM,0.1126,"[(0.1126, 384735004, Nuclear pleomorphism: small regular nuclei (score=1)), (0.1053, 125161006, Unsatisfactory for evaluation due to scant squamous epithelial component), (0.0986, 165475005, Anisocytosis, red cells (finding)), (0.0979, 165473003, Normocytosis, red cells (finding)), (0.0978, 451038000, Haemorrhage in caudate nucleus), (0.0978, 250249008, Spiny prickle cells present), (0.0977, 50110003, No endocervical cells present), (0.0975, 250441008, Epithelial cells present (finding)), (0.0975, 240086009, Myopathy with cytoplasmic inclusions), (0.0973, 129734009, Oval shaped lesion (finding))]"
18,0::2112::2137,invading fibrous tissue—as,PROBLEM,0.1142,"[(0.1142, 416416006, Retrocorneal fibrous membrane), (0.1018, 424286001, Fibrous dysplasia of orbit), (0.1013, 402870005, Reactive fibrous tissue proliferation), (0.0993, 36517007, Polyostotic fibrous dysplasia of bone), (0.0988, 54422002, Fibrous obliteration of appendix), (0.0987, 254736006, Fibrous tissue neoplasm of skin), (0.0979, 404013002, Angiomatoid fibrous histiocytoma of skin), (0.0970, 404014008, Malignant fibrous histiocytoma of skin), (0.0958, 109787002, Inflammatory fibrous hyperplasia of mouth), (0.0951, 254750001, Benign fibrous histiocytoma of skin)]"
