### Importing The Libraries

In [1]:
import yaml
import spacy
import plac
import random
from pathlib import Path
from spacy.util import minibatch , compounding
from spacy.training import Example

### Train Data

In [2]:
data_file = open('..\data\entities.yaml').read()
entities = yaml.safe_load(data_file)


train_data = []


for entity in entities['train_data']:
    train_data.append(entity)

### Create the Model

In [3]:
# Define our Variables
ner_model = None
output_dir=Path("../Name Entity Recognition")
n_iter=100

# Load the Model
if ner_model is not None:
    nlp = spacy.load(ner_model)  
    print("Loaded model '%s'" % ner_model)
else:
    nlp = spacy.blank('en')  
    print("Created blank 'en' model")

# Set up the pipeline
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe('ner', last=True)
else:
    ner = nlp.get_pipe('ner')

Created blank 'en' model


### Train the Recognizer

In [4]:
for _, annotations in train_data:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])

disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']

with nlp.disable_pipes(*disable_pipes):
    optimizer = nlp.begin_training()
    
    for i in range(n_iter):
        random.shuffle(train_data)
        losses = {}
        
        batches = minibatch(train_data , size = compounding(1.0,16.0,1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            
            example = []
            # Update the model with iterating each text
            for i in range(len(texts)):
                doc = nlp.make_doc(texts[i])
                example.append(Example.from_dict(doc, annotations[i]))
            
                # Update the model
                nlp.update(
                           example,
                           drop=0.6,
                           sgd = optimizer,
                           losses=losses
                           )
            print(losses)

{'ner': 5.000000178813934}
{'ner': 11.422746777534485}
{'ner': 17.123470902442932}
{'ner': 23.685417652130127}
{'ner': 29.300854563713074}
{'ner': 33.94172149896622}
{'ner': 39.43307274580002}
{'ner': 47.61340397596359}
{'ner': 51.35083854198456}
{'ner': 58.406691789627075}
{'ner': 62.6309477686882}
{'ner': 68.94206351041794}
{'ner': 74.34121978282928}
{'ner': 79.40822386741638}
{'ner': 85.13292062282562}
{'ner': 89.60194730758667}
{'ner': 93.46016067266464}
{'ner': 96.83398649096489}
{'ner': 99.90848504006863}
{'ner': 102.84806104004383}
{'ner': 106.26602354645729}
{'ner': 108.63189896941185}
{'ner': 112.68100121617317}
{'ner': 117.36149376630783}
{'ner': 120.50684788823128}
{'ner': 123.81522011756897}
{'ner': 127.61775341629982}
{'ner': 130.12355814129114}
{'ner': 132.6813660506159}
{'ner': 134.0834596287459}
{'ner': 138.50192989222705}
{'ner': 141.25504927150905}
{'ner': 143.36184560880065}
{'ner': 145.844644729048}
{'ner': 148.03613171953475}
{'ner': 150.00621259905165}
{'ner': 151



{'ner': 468.5766703991435}
{'ner': 473.0289335224993}
{'ner': 475.4509671583987}
{'ner': 478.18989521751473}
{'ner': 482.67944392210165}
{'ner': 484.54204720745156}
{'ner': 489.1447257415607}
{'ner': 490.8931729811925}
{'ner': 495.06385113634724}
{'ner': 499.5011426746901}
{'ner': 503.56692170221913}
{'ner': 507.2546556687853}
{'ner': 508.8229751359145}
{'ner': 510.257614135317}
{'ner': 511.88534138792284}
{'ner': 515.8863567004269}
{'ner': 518.7930989733254}
{'ner': 520.7648777144318}
{'ner': 522.3478638520869}
{'ner': 525.5101556033301}
{'ner': 526.9237844103595}
{'ner': 528.9217919786283}
{'ner': 530.7610051076449}
{'ner': 534.9804230793202}
{'ner': 536.7562117273742}
{'ner': 540.4552083560088}
{'ner': 543.875526649656}
{'ner': 546.8159952496758}
{'ner': 548.400058018819}
{'ner': 550.5273608442699}
{'ner': 552.7575828816812}
{'ner': 556.0544398572933}
{'ner': 558.2686522309491}
{'ner': 559.7807671609157}
{'ner': 561.4660968284411}
{'ner': 564.0657321316944}
{'ner': 567.6836456071874

### Save the Model

In [5]:
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)   

Saved model to ..\Name Entity Recognition
