Importing the required libraries and downloading SpaCy models

In [3]:
import spacy

nlp = spacy.load("en_core_web_lg")

Loading training data from a JSON file

In [4]:
import json

with open('ner_dataset.json', 'r') as f:
    data = json.load(f)

Preparing training data in SpaCy format

In [None]:
training_data = []
for example in data['examples']:
    temp_dict = {}
    temp_dict['text'] = example['text']
    temp_dict['entities'] = []
    for annotation in example['annotations']:
        start = annotation['start']
        end = annotation['end']
        label = annotation['label'].upper()
        temp_dict['entities'].append((start, end, label))
    training_data.append(temp_dict)
    print(training_data[0])


In [7]:
training_data[0]['text']

'Intravenous azithromycin-induced ototoxicity.'

Converting training data to SpaCy DocBin format

In [8]:
from spacy.tokens import DocBin
from tqdm import tqdm

nlp = spacy.blank("en")
doc_bin = DocBin()
from spacy.util import filter_spans

for training_example in tqdm(training_data):
    text = training_example['text']
    labels = training_example['entities']
    doc = nlp.make_doc(text)
    ents = []
    for start, end, label in labels:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    filtered_ents = filter_spans(ents)
    doc.ents = filtered_ents
    doc_bin.add(doc)
doc_bin.to_disk("train.spacy")


 11%|█▏        | 768/6793 [00:00<00:03, 1603.62it/s]

Skipping entity


 17%|█▋        | 1137/6793 [00:00<00:03, 1654.93it/s]

Skipping entity


100%|██████████| 6793/6793 [00:02<00:00, 2433.05it/s]


Initializing the training configuration

In [10]:
!python -m spacy init fill-config base_config.cfg config.cfg


[+] Auto-filled config with all values
[+] Saved config
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


Training the NER model

In [None]:
!python -m spacy train config.cfg --output ./ --paths.train ./train.spacy --paths.dev ./train.spacy


Loading the trained NER model and visualizing entities

In [None]:
nlp_ner = spacy.load("model-best")
doc = nlp_ner("Lipitor (atorvastatin) for High Cholesterol 							So I went to the doctor for peripheral neuropathy and back spasms.  She does my bloodwork and my cholesterol is extremely high.  She puts me on generic Lipitor and it made my back spasms even worse.  Now my upper back has a tremor that still hasn't gone away even after quitting the drug 2 months ago.  I have loss strength in my legs and I  have terrible short term memory. It would give me dizzy spells and I just felt sick all the time.  I'm still recovering from it and I told my doctor I quit taking it because it was making me shake. Now she wants me to start taking it again, but I'm not going to,")
colors = {"SIDE_EFFECT": "#F67DE3"}
options = {"colors": colors}
spacy.displacy.render(doc, style="ent", options=options, jupyter=True)
