In [66]:
!pip install spacy
!pip install scispacy
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_ner_bc5cdr_md-0.5.4.tar.gz
!pip install wikipedia
!pip install nltk

Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_ner_bc5cdr_md-0.5.4.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_ner_bc5cdr_md-0.5.4.tar.gz (119.8 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [68]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [69]:
import spacy
import scispacy
from spacy import displacy
import wikipedia
from nltk.corpus import wordnet

In [70]:
#Create NLP Object
sci_nlp = spacy.load('en_ner_bc5cdr_md')

sci_nlp.component_names

['tok2vec', 'tagger', 'attribute_ruler', 'lemmatizer', 'parser', 'ner']

In [71]:
sci_nlp.get_pipe('ner').labels

('CHEMICAL', 'DISEASE')

In [72]:
with open("./summary/summary_0.txt", "r") as file:
    # Read the entire file as a string
    text = file.read()

# Print the contents of the file
print(text)

He had been admitted to the St. Margaret's Center for Women & Infants multiple times early in 2013 for management of encephalopathy and ascites . The patient had been discharged from the St. Margaret's Center for Women & Infants on 2013-05-06 , but was readmitted on 2013-05-09 when noted to have worsening renal function . The patient's serum creatinine on the day of discharge , on 2013-05-06 , was 1.9 , but was noted to increase to 3.2 on 2013-05-08 , and was further elevated to 3.6 on 2013-05-09 .

The patient was continued on vancomycin therapy for his previously diagnosed Methicillin resistant , coagulase negative Staph bacteremia . The procedure proceeded without complications , and the patient was transferred to the intensive care unit while still intubated following the procedure . In order to aid in optimization of the patient's fluid status , the patient was on continuous Baker Baker dialysis during the procedure . Over the following 5 days , the patient's creatinine improved m

In [73]:
docx = sci_nlp(text)

In [74]:
# Extract all entities
unique_entities = {}

for ent in docx.ents:
    # Create a key-value pair of the entity text and label
    entity_info = {ent.text: ent.label_}
    # Add the key-value pair to the dictionary (dictionaries automatically ignore duplicate keys)
    unique_entities.update(entity_info)

# Print each unique entity
for entity, label in unique_entities.items():
    print(f"{entity} : {label}")

encephalopathy : DISEASE
ascites : DISEASE
creatinine : CHEMICAL
vancomycin : CHEMICAL
Methicillin : CHEMICAL
bacteremia : DISEASE
Lasix : CHEMICAL
prednisone : CHEMICAL
Neoral : CHEMICAL
CellCept : CHEMICAL


In [75]:
displacy.render(docx, style = 'ent',jupyter=True)

In [76]:
def get_wordnet_definition(term):
    synsets = wordnet.synsets(term)
    if synsets:
        return synsets[0].definition()
    else:
        return "No definition found"

def get_wikipedia_summary(term):
    try:
        # Fetch summary from Wikipedia, sentences=1 means only the first sentence is retrieved
        summary = wikipedia.summary(term, sentences=1)
        return summary
    except wikipedia.exceptions.PageError:
        return "No Wikipedia page found"
    except wikipedia.exceptions.DisambiguationError as e:
        return f"Multiple entries found: {e.options[:3]}"  # Show first 3 ambiguous options
    except Exception as e:
        return str(e)


entity_meanings = {}

for entity, label in unique_entities.items():
    if ' ' in entity:  # Check if the entity is a multi-word phrase
        summary = get_wikipedia_summary(entity)
    else:
      summary = get_wordnet_definition(entity)
    entity_meanings[entity] = summary

# Print each entity and its meaning
for entity, meaning in entity_meanings.items():
    print(f"{entity} : {meaning}")

encephalopathy : any disorder or disease of the brain
ascites : accumulation of serous fluid in peritoneal cavity
creatinine : No definition found
vancomycin : an antibiotic (trade name Vancocin) effective against some bacterial infections
Methicillin : antibiotic drug of the penicillin family used in the treatment of certain staphylococcal infections
bacteremia : transient presence of bacteria (or other microorganisms) in the blood
Lasix : commonly used diuretic (trade name Lasix) used to treat hypertension and edema
prednisone : a dehydrogenated analogue of cortisol (trade names Orasone or Deltasone or Liquid Pred or Meticorten); used as an anti-inflammatory drug in the treatment of arthritis and as an immunosuppressant
Neoral : No definition found
CellCept : No definition found


In [77]:
# Write the text to the file
with open("./summary_with_meanings.txt", "w") as file:
    file.write(text)
    file.write("\n\n")  # Add two newline characters to separate the text from the entity meanings

    # Write each entity and its meaning to the file
    for entity, meaning in entity_meanings.items():
        file.write(f"{entity} : {meaning}\n")  # Add a newline character after each entity-meaning pair