In [2]:
import spacy
#!pip install scispacy
import scispacy
from spacy import displacy
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
#!pip install negspacy
from negspacy.negation import Negex

import pandas as pd

#!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bc5cdr_md-0.4.0.tar.gz
#!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz

In [3]:
nlp0 = spacy.load("en_core_sci_md")
nlp1 = spacy.load("en_ner_bc5cdr_md")

#patNotes = pd.read_csv("patient_notes.csv")
#p_note = patNotes.iloc[0].pn_history        # load first patient note
p_note = "Patient resting in bed. Patient given azithromycin without any difficulty. Patient has audible wheezing, \
states chest tightness. No evidence of hypertension.\
Patient denies nausea at this time. zofran declined. Patient is also having intermittent sweating associated with pneumonia. \
Patient refused pain but tylenol still given. Neither substance abuse nor alcohol use however cocaine once used in the last year. Alcoholism unlikely.\
Patient has headache and fever. Patient is not diabetic. \
No signs of diarrhea. Lab reports confirm lymphocytopenia. Cardaic rhythm is Sinus bradycardia. \
Patient also has a history of cardiac injury. No kidney injury reported. No abnormal rashes or ulcers. \
Patient might not have liver disease. Confirmed absence of hemoptysis. Although patient has severe pneumonia and fever \
, test reports are negative for COVID-19 infection. COVID-19 viral infection absent."

In [4]:
# lemmatizing notes to capture all forms of negation
def lemmatize(note, nlp):
    doc = nlp(note)
    lemNote = [wd.lemma_ for wd in doc]
    return " ".join(lemNote)

lem_note = lemmatize(p_note, nlp0)
print(lem_note)

doc = nlp1(lem_note)

patient rest in bed . patient give azithromycin without any difficulty . patient have audible wheezing , state chest tightness . no evidence of hypertension . patient deny nausea at this time . zofran decline . patient be also have intermittent sweating associate with pneumonia . patient refuse pain but tylenol still give . neither substance abuse nor alcohol use however cocaine once use in the last year . alcoholism unlikely . patient have headache and fever . patient be not diabetic . no sign of diarrhea . lab report confirm lymphocytopenia . cardaic rhythm be sinus bradycardia . patient also have a history of cardiac injury . no kidney injury report . no abnormal rash or ulcer . patient might not have liver disease . confirmed absence of hemoptysis . although patient have severe pneumonia and fever , test report be negative for covid-19 infection . covid-19 viral infection absent .


In [5]:
# options for rendering using displacy
def get_entity_options():
    entities = {"DISEASE", "CHEMICAL", "NEG_ENTITY"}
    colors = {'DISEASE': 'linear-gradient(180deg, #66ffcc, #abf763)', 'CHEMICAL': 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', "NEG_ENTITY":'linear-gradient(90deg, #ffff66, #ff6600)'}
    options = {"ents": entities, "colors": colors}
    return options

options = get_entity_options()

displacy.render(doc, jupyter=True, style="ent", options=options)        # visualize named entities in note

In [6]:
def neg_model(nlp_model):
    nlp = spacy.load(nlp_model, disable=["parser"])
    nlp.add_pipe("sentencizer")
    nlp.add_pipe("negex", config={"ent_types": ["DISEASE", "CHEMICAL"]})
    #neg = Negex(nlp)
    #nlp.add_pipe(neg)
    return nlp

# negspacy sets a new attribute e.__.negex to True if a negative concept is found

def negation_handling(nlp_model, note, neg_model):
    results = []
    nlp = neg_model(nlp_model) 
    note = note.split(".")              #sentence tokenizing based on delimeter 
    note = [n.strip() for n in note]    #removing extra spaces at the begining and end of sentence
    for t in note:
        doc = nlp(t)
        for e in doc.ents:
            rs = str(e._.negex)
            if rs == "True": 
                results.append(e.text)
    return results

# list of negative concepts from note identified by negspacy
results0 = negation_handling("en_ner_bc5cdr_md", lem_note, neg_model)
results0

['hypertension',
 'alcoholism',
 'diarrhea',
 'abnormal rash',
 'ulcer',
 'hemoptysis',
 'covid-19 infection']

In [7]:
# function to identify span objects of matched negative phrases from note
def match(nlp, terms, label):
    patterns = [nlp.make_doc(text) for text in terms]
    matcher = PhraseMatcher(nlp.vocab)
    matcher.add(label, None, *patterns)
    return matcher

# replacing labels for identified negative entities
def overwrite_ent_label(matcher, doc):
    matches = matcher(doc)
    seen_tokens = set()
    new_entities = []
    entities = doc.ents

    for match_id, start, end in matches:
        if start not in seen_tokens and end - 1 not in seen_tokens:
            new_entities.append(Span(doc, start, end, label=match_id))
            entities = [e for e in entities if not (e.start < end and e.end > start)]
            seen_tokens.update(range(start, end))
    
    doc.ents = tuple(entities) + tuple(new_entities)
    return doc

matcher = match(nlp1, results0, "NEG_ENTITY")

# create doc object with NEG_ENTITY label
doc0 = overwrite_ent_label(matcher, doc)

# visualize identified negative labels
displacy.render(doc0, jupyter=True, style="ent", options=options)