In [1]:
import spacy
from spacy import displacy
import IPython

from collections import defaultdict

In [2]:
from IPython.display import display, HTML

In [3]:
mednlp = spacy.load("en_core_med7_lg")        # Med7 model
bionlp = spacy.load("en_ner_bionlp13cg_md")   # BioNLP13CG model
bc5cdr_nlp = spacy.load("en_ner_bc5cdr_md")   # BC5CDR model

  deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]


In [15]:
text = """
This 23-year-old white female presents with complaint of allergies. She used to have allergies when she lived in Seattle but she thinks they are worse here. In the past, she has tried Claritin, and Zyrtec. Both worked for short time but then seemed to lose effectiveness. She has used Allegra also. She used that last summer and she began using it again two weeks ago. It does not appear to be working very well. She has used over-the-counter sprays but no prescription nasal sprays. She does have asthma but doest not require daily medication for this and does not think it is flaring up.
Her only medication currently is Ortho Tri-Cyclen and the Allegra.
She has no known medicine allergies.
Vitals: Weight was 130 pounds and blood pressure 124/78.
HEENT: Her throat was mildly erythematous without exudate. Nasal mucosa was erythematous and swollen. Only clear drainage was seen. TMs were clear.
Neck: Supple without adenopathy.
Lungs: Clear.
Allergic rhinitis.
1. She will try Zyrtec instead of Allegra again. Another option will be to use loratadine. She does not think she has prescription coverage so that might be cheaper.
2. Samples of Nasonex two sprays in each nostril given for three weeks. A prescription was written as well.
"""

In [16]:
doc_med7 = mednlp(text)
doc_bionlp = bionlp(text)
doc_bc5cdr = bc5cdr_nlp(text)

In [17]:
def render_entities(doc, model_name):
    html = displacy.render(doc, style="ent", jupyter=False)
    return f"<h3>{model_name}</h3>" + html

In [18]:
html_output = f"""
<div style="display: flex; gap: 30px;">
    <div style="flex: 1;">{render_entities(doc_med7, "Med7")}</div>
    <div style="flex: 1;">{render_entities(doc_bionlp, "BioNLP13CG")}</div>
    <div style="flex: 1;">{render_entities(doc_bc5cdr, "BC5CDR")}</div>
</div>
"""
display(HTML(html_output))

In [8]:
def get_entity_groups(t):
    # doc_med = mednlp(t)
    doc_bio = bionlp(t)
    doc_bc5 = bc5cdr_nlp(t)

    d = defaultdict(list)
    # for e in doc_med.ents:
    #     d[e.label_].append(e.text)
    for e in doc_bio.ents:
        d[e.label_].append(e.text)
    for e in doc_bc5.ents:
        d[e.label_].append(e.text)

    return sorted(d.items())

In [9]:
entities = get_entity_groups(text)

In [10]:
entities

[('CANCER', ['COPD']),
 ('CHEMICAL', ['amoxicillin', 'salbutamol', 'fluticasone']),
 ('DISEASE',
  ['throat infection',
   'asthma',
   'chronic obstructive pulmonary disease',
   'COPD']),
 ('IMMATERIAL_ANATOMICAL_ENTITY', ['inhaler']),
 ('ORGAN', ['pulmonary']),
 ('ORGANISM', ['patient']),
 ('SIMPLE_CHEMICAL', ['amoxicillin', 'salbutamol', 'fluticasone'])]

In [20]:
from transformers import pipeline
from IPython.display import display, HTML

ner_pipe = pipeline("ner", model="d4sdata/biomedical-ner-all", aggregation_strategy="simple")

text = """
This 23-year-old white female presents with complaint of allergies. She used to have allergies when she lived in Seattle but she thinks they are worse here. In the past, she has tried Claritin, and Zyrtec. Both worked for short time but then seemed to lose effectiveness. She has used Allegra also. She used that last summer and she began using it again two weeks ago. It does not appear to be working very well. She has used over-the-counter sprays but no prescription nasal sprays. She does have asthma but doest not require daily medication for this and does not think it is flaring up.
Her only medication currently is Ortho Tri-Cyclen and the Allegra.
She has no known medicine allergies.
Vitals: Weight was 130 pounds and blood pressure 124/78.
HEENT: Her throat was mildly erythematous without exudate. Nasal mucosa was erythematous and swollen. Only clear drainage was seen. TMs were clear.
Neck: Supple without adenopathy.
Lungs: Clear.
Allergic rhinitis.
1. She will try Zyrtec instead of Allegra again. Another option will be to use loratadine. She does not think she has prescription coverage so that might be cheaper.
2. Samples of Nasonex two sprays in each nostril given for three weeks. A prescription was written as well.
"""

ner_results = ner_pipe(text)

def highlight_entities(text, entities):
    entities = sorted(entities, key=lambda x: x['start'])
    last_idx = 0
    html = ""

    for ent in entities:
        start, end = ent['start'], ent['end']
        label = ent['entity_group']
        entity_text = text[start:end]
        html += text[last_idx:start]
        html += f'<mark style="background-color: #dddddd; padding: 4px 4px; border-radius: 4px;">{entity_text} <b>{label}</b></mark>'
        last_idx = end
        
    html += text[last_idx:]
    return html

html_output = highlight_entities(text, ner_results)
display(HTML(f"<div style='font-family:Arial; font-size: 15px;'>{html_output}</div>"))

Device set to use cuda:0
