# ***Install Libraries***

In [15]:
%%capture
import sys
!{sys.executable} -m pip install spacy
!{sys.executable} -m pip install scispacy


In [16]:
%%capture
import sys
#model trained on BCSCDR corpus for Disease and Chemical entities
!{sys.executable} -m pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.3/en_ner_bc5cdr_md-0.5.3.tar.gz

# **Import libraries**


In [17]:
import pandas as pd
import spacy
import scispacy
import en_ner_bc5cdr_md
from spacy.matcher import Matcher

## Importing the Dataset

In [18]:
medicalTranscript_df = pd.read_csv('med_transcripts.csv')
medicalTranscript_df.head()


Unnamed: 0,medical_specialty,transcription
0,Dentistry,"PREOPERATIVE DIAGNOSIS:, Completely bony impa..."
1,Dentistry,"CHIEF COMPLAINT:, Toothache.,HISTORY OF PRESE..."
2,Dentistry,"PREOPERATIVE DIAGNOSES: , Carious teeth #2 and..."
3,Dentistry,"PREOPERATIVE DIAGNOSES,1. Carious teeth #2, #..."
4,Dentistry,"PREOPERATIVE DIAGNOSES,1. Basal cell nevus sy..."


# Loading the Pre-Trained model

In [14]:
nlp = spacy.load("en_ner_bc5cdr_md")


  self.meta.update(data)


## Named Enttities Recognition

In [19]:
# Identify and Classify Entities
medical_transcripts = medicalTranscript_df['transcription']
print(len(medical_transcripts))


250


In [21]:
def extract_entities(data):
  entities = []
  for transcript in data:
    doc = nlp(transcript)
    for ent in doc.ents:
      entities.append((ent.text, ent.label_))
  return entities

named_entities = extract_entities(medical_transcripts)
for entity, category in named_entities:
  print(f"{entity}: {category}")

throat: DISEASE
lidocaine: CHEMICAL
epinephrine: CHEMICAL
bupivacaine: CHEMICAL
1:200,000 epinephrine: CHEMICAL
Tooth: DISEASE
throat: DISEASE
NG: CHEMICAL
tooth pain: DISEASE
toothache: DISEASE
OxyContin: CHEMICAL
Vicodin: CHEMICAL
knee pain: DISEASE
pain: DISEASE
toothache: DISEASE
fever: DISEASE
chills: DISEASE
fatigue: DISEASE
headache: DISEASE
pain: DISEASE
toothache pain: DISEASE
swelling: DISEASE
sore: DISEASE
sinus congestion: DISEASE
pain: DISEASE
pain: DISEASE
pain: DISEASE
chest pain: DISEASE
shortness of breath: DISEASE
cough: DISEASE
abdominal pain: DISEASE
nausea: DISEASE
pain: DISEASE
rashes: DISEASE
weakness: DISEASE
numbness: DISEASE
swelling: DISEASE
OxyContin: CHEMICAL
oxygen: CHEMICAL
icterus: DISEASE
infection: DISEASE
erythema: DISEASE
erythema: DISEASE
rhinorrhea: DISEASE
tenderness: DISEASE
meningismus: DISEASE
tenderness: DISEASE
erythema: DISEASE
abscess: DISEASE
fractures: DISEASE
infection: DISEASE
erythema: DISEASE
abscess: DISEASE
Ludwig's syndrome: DISEAS

# Visualize the Entities in the Document

In [22]:
medical_transcript = medicalTranscript_df['transcription'][100]
doc = nlp(medical_transcript)
colors = {"DISEASE": "yellow", "CHEMICAL": "red"}
color_options = {"ents": ["DISEASE", "CHEMICAL"], "colors": colors}
spacy.displacy.render(doc, style="ent", jupyter=True, options=color_options)
#

## **Ruled Based Relation Extraction**

In [23]:
pattern = [{'ENT_TYPE':'CHEMICAL'}, {'LIKE_NUM': True}, {'IS_ASCII': True}]
matcher = Matcher(nlp.vocab)
matcher.add("DRUG_DOSE", [pattern])

In [24]:
for transcription in medical_transcripts:
  doc = nlp(transcription)
  matches = matcher(doc)
  for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]
    span = doc[start:end]
    print(string_id,span.text)

DRUG_DOSE lidocaine 2%
DRUG_DOSE bupivacaine 0.5%
DRUG_DOSE lidocaine 2%
DRUG_DOSE Marcaine 1.7 mL
DRUG_DOSE lidocaine 2%
DRUG_DOSE Marcaine 0.5%
DRUG_DOSE Lidocaine 1%
DRUG_DOSE code 21470,
DRUG_DOSE lidocaine 1%
DRUG_DOSE clindamycin 900 mg
DRUG_DOSE Xylocaine 1%
DRUG_DOSE epinephrine 7 ml
DRUG_DOSE Motrin 400 mg
DRUG_DOSE lidocaine 2%
DRUG_DOSE bupivacaine 0.5%
DRUG_DOSE Depo-Medrol 80 mg
DRUG_DOSE Solu-Medrol 125 mg
DRUG_DOSE Solu-Medrol 125 mg
DRUG_DOSE prednisone 40 mg
DRUG_DOSE colchicine 0.6 mg
DRUG_DOSE colchicine 0.6 daily
DRUG_DOSE Colchicine 0.6 mg
DRUG_DOSE ranitidine 150 mg
DRUG_DOSE ergocalciferol 50,000 weekly
DRUG_DOSE ointment 0.1%
DRUG_DOSE Keflex 500 mg
DRUG_DOSE Xylocaine 1%
DRUG_DOSE Demerol 50 mg
DRUG_DOSE amoxicillin 500 mg
DRUG_DOSE cream 0.1,
DRUG_DOSE cream 0.1;
DRUG_DOSE Xylocaine 1%
DRUG_DOSE Adderall 5 mg
DRUG_DOSE Nexium 4 mg
DRUG_DOSE Propanolol 10 mg
DRUG_DOSE Spironolactone 100 mg
DRUG_DOSE Lactulose 60 cc
DRUG_DOSE Seroquel 25 mg
DRUG_DOSE Zyprexa 5 m