In [1]:
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_md-0.2.4.tar.gz

Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_md-0.2.4.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_md-0.2.4.tar.gz
Building wheels for collected packages: en-core-sci-md
  Building wheel for en-core-sci-md (setup.py) ... [?25ldone
[?25h  Created wheel for en-core-sci-md: filename=en_core_sci_md-0.2.4-cp37-none-any.whl size=70498247 sha256=1e0cb8739f9a988ea02f0701987095f8ced825186f744338cd3f89e8dc66ece9
  Stored in directory: /Users/alec/Library/Caches/pip/wheels/12/b3/89/7fbb30f56411e8b4002eac6d5568ab46da63191a2287aa17bf
Successfully built en-core-sci-md


In [17]:
import spacy
import medspacy
from medspacy.visualization import visualize_ent

# Overview
In this notebook, we'll look at how to extract clinical concepts and attributes from text.
- Target matching
- Section detection
- Context analysis

In [11]:
with open("./discharge_summary.txt") as f:
    text = f.read()

In [36]:
nlp = spacy.load("en_core_web_sm", disable=["ner"])

# Target extraction
In this step, we'll write rules to extract the main concepts we're interested in.

In this example, we'll use two utilities provided in `medspacy.ner` for rule-based matching: the `TargetMatcher` and `TargetRule`. However, you can use any spaCy components for adding spans to `doc.ents`, including pre-trained NER models or other [spaCy rule-based matching components](https://spacy.io/usage/rule-based-matching/).

## Target concepts
In our text, we'll extract the following concepts:
- Diagnoses 
- Medications
In addition, we'll show a few examples of how to add a custom spaCy attribute to a target rule to add an ICD-10 diagnosis code as an attribute of an entity.

In [37]:
from medspacy.ner import TargetMatcher, TargetRule

In [38]:
target_matcher = TargetMatcher(nlp)

In [39]:
nlp.add_pipe(target_matcher)

In [40]:
target_rules1 = [
    TargetRule("abdominal pain", "PROBLEM"),
    TargetRule("stroke", "PROBLEM"),
    TargetRule("hemicolectomy", "TREATMENT"),
    TargetRule("Hydrochlorothiazide", "TREATMENT"),
    TargetRule("colon cancer", "PROBLEM"),
    TargetRule("radiotherapy", "PROBLEM",
              pattern=[{"LOWER": "xrt"}]),
    
]

In [41]:
target_matcher.add(target_rules1)

In [42]:
doc = nlp(text)

In [43]:
visualize_ent(doc)

AttributeError: 'NoneType' object has no attribute 'edges'

In [44]:
for ent in doc.ents:
    print(ent, ent.label_, ent._.target_rule, sep="  |  ")
    print()

Hydrochlorothiazide  |  TREATMENT  |  TargetRule(literal="Hydrochlorothiazide", category="TREATMENT", pattern=None, attributes=None, on_match=None)

Abdominal pain  |  PROBLEM  |  TargetRule(literal="abdominal pain", category="PROBLEM", pattern=None, attributes=None, on_match=None)

stroke  |  PROBLEM  |  TargetRule(literal="stroke", category="PROBLEM", pattern=None, attributes=None, on_match=None)

abdominal pain  |  PROBLEM  |  TargetRule(literal="abdominal pain", category="PROBLEM", pattern=None, attributes=None, on_match=None)

Colon cancer  |  PROBLEM  |  TargetRule(literal="colon cancer", category="PROBLEM", pattern=None, attributes=None, on_match=None)

hemicolectomy  |  TREATMENT  |  TargetRule(literal="hemicolectomy", category="TREATMENT", pattern=None, attributes=None, on_match=None)

XRT  |  PROBLEM  |  TargetRule(literal="radiotherapy", category="PROBLEM", pattern=[{'LOWER': 'xrt'}], attributes=None, on_match=None)

stroke  |  PROBLEM  |  TargetRule(literal="stroke", catego

In [45]:
from spacy.tokens import Span

In [46]:
Span.set_extension("icd10", default="")

ValueError: [E090] Extension 'icd10' already exists on Span. To overwrite the existing extension, set `force=True` on `Span.set_extension`.

In [47]:
target_rules2 = [
    TargetRule("Type II Diabetes Mellitus", "PROBLEM", 
              pattern=[
                  {"LOWER": "type"},
                  {"LOWER": {"IN": ["2", "ii", "two"]}},
                  {"LOWER": {"IN": ["dm", "diabetes"]}},
                  {"LOWER": "mellitus", "OP": "?"}
              ],
              attributes={"icd10": "E11.9"}),
    TargetRule("Hypertension", "PROBLEM",
              pattern=[{"LOWER": {"IN": ["htn", "hypertension"]}}],
              attributes={"icd10": "I10"}),
    
    
]

In [50]:
target_matcher.add(target_rules2)

In [51]:
doc = nlp(text)

In [52]:
for ent in doc.ents:
    if ent._.icd10 != "":
        print(ent, ent._.icd10)

type 2 dm E11.9
Type II Diabetes Mellitus E11.9
Hypertension I10
HTN I10


# Context

In [53]:
from medspacy.context import ConTextComponent, ConTextItem

In [54]:
context = ConTextComponent(nlp, rules="default")

In [55]:
nlp.add_pipe(context)

In [56]:
nlp.pipe_names

['tagger', 'parser', 'target_matcher', 'context']

In [57]:
doc = nlp("Mother with stroke at age 82.")

In [58]:
visualize_ent(doc)

In [59]:
from medspacy.visualization import visualize_dep

In [60]:
visualize_dep(doc)

In [None]:
doc = nlp