# Using spaCy for Named Entity Recognition (NER)

In [None]:
import spacy
import medspacy
from spacy import displacy
import pymysql
import pandas as pd
import getpass
import random
from ipywidgets import interact
from IPython.display import HTML, display
import warnings

In [None]:
warnings.filterwarnings("ignore")

## Install a Default Language Model

The following cell downloads a default English language model. It is defined using web content, which we will see does not work well for medial texts.

In [None]:
!python -m spacy download en_core_web_sm

## Load Web  (`wnlp`) and Medical Language Models (`mnlp`)


In [None]:
wnlp = spacy.load("en_core_web_sm")
mnlp = medspacy.load("en_info_3700_i2b2_2012", enable=['sentencizer', 'tagger', 'parser',
                                                      'ner', 'target_matcher', 'context',
                                                     'sectionizer'])

In [None]:
conn = pymysql.connect(host="35.233.174.193",port=3306,
                           user=input("Enter username for MIMIC2 database"),
                           passwd=getpass.getpass("Enter password for MIMIC2 database"),
                           db='mimic2')


### Get Text

Textual data is stored in the `noteevents` table

In [None]:
reports = pd.read_sql("""SELECT text, category FROM noteevents""", conn)

### What Kind of Notes are Available?

In [None]:
reports.category.unique()

### Split reports into dictiory keyed by category type

In [None]:
cat_reports = {c:reports[reports.category==c]['text'].tolist() for c in reports.category.unique()}

## Compare Web/Medical Language Markup

The following function takes a list of reports, randomnly selects one and identifies named entities using first the medical specific language model in medspaCy and then the default web-based English languge model of spaCy.

In [None]:
def view_ner_reports(txt):
    text = random.choice(txt)
    display(HTML("<h1> Original Text</h1>"))
    print(text)
    display(HTML("<h1> MedspaCy Markup</h1>"))

    displacy.render(mnlp(text), style="ent")
    display(HTML("<h1> Web-based spaCy Markup</h1>"))
    displacy.render(wnlp(text), style="ent")

In [None]:
view_ner_reports(cat_reports['Nursing/Other'])