Annotating (Spacy training data fromat)

In [1]:
import spacy
from spacy.training import Example
nlp = spacy.load("en_core_web_md")

doc = nlp("I will visit you in Munich.")
annotations = {"entities": [(20, 26, "GPE")]}
example_sent = Example.from_dict(doc, annotations)

 Updating an existing pipeline component

In [2]:
#Disabling the other statistical models
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
nlp.disable_pipes(*other_pipes)

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer']

 Model Training Procedure

In [18]:
import random
import spacy
from spacy.training import Example

nlp = spacy.load("en_core_web_md")

train_set = [
    ("navigate home", {"entities": [(9, 13, "GPE")]}),
    ("navigate to office", {"entities": [(12, 18, "GPE")]}),
    ("navigate", {"entities": []}),
    ("navigate to Oxford Street", {"entities": [(12, 25, "GPE")]})
]

epochs = 20

#disable pipe
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes):
    #create optimizer obj
    optimizer = nlp.create_optimizer()
    
    #for each epoch, we will shuffle our dataset
    for i in range(epochs):
        random.shuffle(train_set)
        
        #create an example obj for the sents and its annotation
        example = Example.from_dict(doc, annotations)
        
        #feed the examp obj and optimi obj
        nlp.update([example], sgd=optimizer)

In [20]:

#save newly trained NER component to disk under a directory called navi_ner
ner = nlp.get_pipe("ner")
ner.to_disk("navi_ner")

In [36]:
doc = nlp("Drive me to Munich.")
doc.ents

()

In [23]:
# Save and loading the custom model
nlp = spacy.load('en_core_web_md', disable=['ner'])
ner = nlp.create_pipe("ner")
ner.from_disk("navi_ner")
nlp.add_pipe(ner, "navi_ner")
print(nlp.meta['pipeline'])

ValueError: Cannot deserialize model: mismatched structure

 Training A Pipeline component from Scratch

In [None]:
import random
import spacy
from spacy.training import Example

In [None]:
#TRAINING OF THREE EXAMPLES
train_set = [
    ("Methylphenidate is effectively used in treating children with epilepsy and ADHD.", 
    {"entities": [(0, 15, "DRUG"), (62, 70, "DISEASE"),
                 (75, 79, "DISEASE")]}),
    ("Patients were followed up for 6 months.", {"entities": []}),
    ("Antichlamydial antibiotics may be useful for curing coronary-artery disease.",
    {"entities": [(0, 26, "DRUG"), (52, 75, "DIS")]})
]

In [None]:
entities = ["DISEASE", "DIS", "DRUG"]

In [27]:
#CREATE BLANK MODEL
nlp = spacy.blank("en")

In [28]:
#create blank nER component
ner = nlp.add_pipe("ner")
ner

<spacy.pipeline.ner.EntityRecognizer at 0x1535944de80>

In [29]:
#add each medical label
for ent in entities:
    ner.add_label(ent)

In [30]:
#define number of epochs
epochs = 25

In [32]:
#next two lines disable the other components
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes):
    #optimi obj by calling begin_training to forget previous labels
    optimizer = nlp.begin_training()
    
    for i in range(25):
        random.shuffle(train_set)
        for text, annotation in train_set:
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotation)
            nlp.update([example], sgd=optimizer)

In [33]:
doc = nlp("I had a coronary disease.")
doc.ents

(I, coronary disease)

In [34]:
doc.ents[1].label_

'DIS'

In [35]:
doc = nlp("I met you at Trump Tower.")
doc.ents

(I,)

In [None]:
from spacy import displacy
doc = nlp("I had a coronary disease.")
displacy.serve(doc, style="ent")




Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

