In [None]:
## Load the Packages

from __future__ import unicode_literals, print_function
import random
from pathlib import Path
import spacy
from tqdm import tqdm 
nlp1 = spacy.load('en_core_web_lg')


## Train Data

TRAIN_DATA = [
    ('Who is Nishanth?', {
        'entities': [(7, 15, 'PERSON')]
    }),
     ('Who is Kamal Khumar?', {
        'entities': [(7, 19, 'PERSON')]
    }),
    ('I like London and Berlin.', {
        'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
    })
]


## Define the variables

model = None
output_dir=Path("C:\\Users\\nithi\\Documents\\ner")
n_iter=100


## Load the model

if model is not None:
    nlp = spacy.load(model)  
    print("Loaded model '%s'" % model)
else:
    nlp = spacy.blank('en')
    print("Created blank 'en' model")
    
    
## Set-up the pipeline    
    
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner, last=True)
else:
    ner = nlp.get_pipe('ner')


## Train the recognizer
    
for _, annotations in TRAIN_DATA:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in tqdm(TRAIN_DATA):
            nlp.update(
                [text],  
                [annotations],  
                losses=losses)
        print(losses)
    

## Test the trained model

for text, _ in TRAIN_DATA:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
    #print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])

    
## Save the model  
    
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)        

Created blank 'en' model


  **kwargs
100%|██████████| 3/3 [00:00<00:00,  8.29it/s]


{'ner': 13.213457107543945}


100%|██████████| 3/3 [00:00<00:00, 10.90it/s]


{'ner': 9.716748863458633}


100%|██████████| 3/3 [00:00<00:00, 11.03it/s]


{'ner': 5.797899894416332}


100%|██████████| 3/3 [00:00<00:00, 10.27it/s]


{'ner': 4.15419275930617}


100%|██████████| 3/3 [00:00<00:00, 10.87it/s]


{'ner': 6.649135896819644}


100%|██████████| 3/3 [00:00<00:00, 10.37it/s]


{'ner': 2.8057605393987615}


100%|██████████| 3/3 [00:00<00:00, 10.62it/s]


{'ner': 1.7193298725769637}


100%|██████████| 3/3 [00:00<00:00, 12.70it/s]


{'ner': 1.009528205096899}


100%|██████████| 3/3 [00:00<00:00, 16.05it/s]


{'ner': 2.492289668871365}


100%|██████████| 3/3 [00:00<00:00, 16.55it/s]


{'ner': 2.317964863003837}


100%|██████████| 3/3 [00:00<00:00, 17.96it/s]


{'ner': 1.040526380293187}


100%|██████████| 3/3 [00:00<00:00, 13.05it/s]


{'ner': 0.8920971786410803}


100%|██████████| 3/3 [00:00<00:00, 16.99it/s]


{'ner': 0.3068724398390472}


100%|██████████| 3/3 [00:00<00:00, 15.59it/s]


{'ner': 0.10153618201917247}


100%|██████████| 3/3 [00:00<00:00, 16.72it/s]


{'ner': 0.09808417651348195}


100%|██████████| 3/3 [00:00<00:00, 17.14it/s]


{'ner': 0.0001118649521893722}


100%|██████████| 3/3 [00:00<00:00, 16.50it/s]


{'ner': 6.215947620727413e-05}


100%|██████████| 3/3 [00:00<00:00, 17.02it/s]


{'ner': 3.517436201430635e-05}


100%|██████████| 3/3 [00:00<00:00, 16.85it/s]


{'ner': 3.97070184853833e-06}


100%|██████████| 3/3 [00:00<00:00, 15.61it/s]


{'ner': 2.3033321780358863e-06}


100%|██████████| 3/3 [00:00<00:00, 16.92it/s]


{'ner': 4.051046412420489e-07}


100%|██████████| 3/3 [00:00<00:00, 15.35it/s]


{'ner': 3.521974446893275e-07}


100%|██████████| 3/3 [00:00<00:00, 12.08it/s]


{'ner': 2.616484007816102e-07}


100%|██████████| 3/3 [00:00<00:00, 10.43it/s]


{'ner': 2.0825229114136983e-07}


100%|██████████| 3/3 [00:00<00:00, 10.67it/s]


{'ner': 1.753109543433401e-07}


100%|██████████| 3/3 [00:00<00:00, 10.53it/s]


{'ner': 1.5860163938481064e-07}


100%|██████████| 3/3 [00:00<00:00, 11.04it/s]


{'ner': 1.0983013787387091e-07}


100%|██████████| 3/3 [00:00<00:00, 10.17it/s]


{'ner': 9.464996151473534e-08}


100%|██████████| 3/3 [00:00<00:00, 10.82it/s]


{'ner': 8.015639279898573e-08}


100%|██████████| 3/3 [00:00<00:00, 10.59it/s]


{'ner': 5.927444070791714e-08}


100%|██████████| 3/3 [00:00<00:00, 10.41it/s]


{'ner': 4.605768711295926e-08}


100%|██████████| 3/3 [00:00<00:00, 10.19it/s]


{'ner': 4.064611742181434e-08}


100%|██████████| 3/3 [00:00<00:00, 10.93it/s]


{'ner': 3.5905725768477695e-08}


100%|██████████| 3/3 [00:00<00:00, 10.54it/s]


{'ner': 2.7665644257619205e-08}


100%|██████████| 3/3 [00:00<00:00, 10.63it/s]


{'ner': 2.51874966869299e-08}


100%|██████████| 3/3 [00:00<00:00, 10.18it/s]


{'ner': 2.1939111878751632e-08}


100%|██████████| 3/3 [00:00<00:00, 10.62it/s]


{'ner': 2.0089463080660857e-08}


100%|██████████| 3/3 [00:00<00:00, 10.96it/s]


{'ner': 1.677596632760254e-08}


100%|██████████| 3/3 [00:00<00:00, 10.47it/s]


{'ner': 1.5609676252085748e-08}


100%|██████████| 3/3 [00:00<00:00, 10.01it/s]


{'ner': 1.4176087505117065e-08}


100%|██████████| 3/3 [00:00<00:00, 11.16it/s]


{'ner': 1.3016321426447143e-08}


100%|██████████| 3/3 [00:00<00:00, 10.54it/s]


{'ner': 1.1730458171355169e-08}


100%|██████████| 3/3 [00:00<00:00, 10.46it/s]


{'ner': 1.1110669584580043e-08}


100%|██████████| 3/3 [00:00<00:00, 10.64it/s]


{'ner': 1.0351248293419106e-08}


100%|██████████| 3/3 [00:00<00:00, 10.31it/s]


{'ner': 9.500709549260636e-09}


100%|██████████| 3/3 [00:00<00:00, 10.27it/s]


{'ner': 8.914730376862637e-09}


100%|██████████| 3/3 [00:00<00:00, 10.57it/s]


{'ner': 8.513315882333174e-09}


100%|██████████| 3/3 [00:00<00:00, 10.77it/s]


{'ner': 8.129666502352173e-09}


100%|██████████| 3/3 [00:00<00:00, 11.37it/s]


{'ner': 7.458057805942672e-09}


100%|██████████| 3/3 [00:00<00:00, 11.38it/s]


{'ner': 7.1376074785584195e-09}


100%|██████████| 3/3 [00:00<00:00, 10.19it/s]


{'ner': 6.714201408832765e-09}


100%|██████████| 3/3 [00:00<00:00, 10.92it/s]


{'ner': 6.521705478838868e-09}


100%|██████████| 3/3 [00:00<00:00, 10.59it/s]


{'ner': 6.126757622460165e-09}


100%|██████████| 3/3 [00:00<00:00, 11.00it/s]


{'ner': 5.778815000417513e-09}


100%|██████████| 3/3 [00:00<00:00, 11.43it/s]


{'ner': 5.527392575235784e-09}


100%|██████████| 3/3 [00:00<00:00, 10.78it/s]


{'ner': 5.276158483709061e-09}


100%|██████████| 3/3 [00:00<00:00, 10.59it/s]


{'ner': 5.156758728065496e-09}


100%|██████████| 3/3 [00:00<00:00, 12.37it/s]


{'ner': 4.836732488090228e-09}


100%|██████████| 3/3 [00:00<00:00, 17.11it/s]


{'ner': 4.660935065312587e-09}


100%|██████████| 3/3 [00:00<00:00, 17.31it/s]


{'ner': 4.466882266226325e-09}


100%|██████████| 3/3 [00:00<00:00, 16.84it/s]


{'ner': 4.324575258273899e-09}


100%|██████████| 3/3 [00:00<00:00, 12.41it/s]


{'ner': 4.142347094702174e-09}


100%|██████████| 3/3 [00:00<00:00, 11.38it/s]


{'ner': 3.940215783736246e-09}


100%|██████████| 3/3 [00:00<00:00, 10.44it/s]


{'ner': 3.803273204948286e-09}


100%|██████████| 3/3 [00:00<00:00, 10.11it/s]


{'ner': 3.6479590728650208e-09}


100%|██████████| 3/3 [00:00<00:00,  9.90it/s]


{'ner': 3.521003463522799e-09}


100%|██████████| 3/3 [00:00<00:00, 10.63it/s]


{'ner': 3.4468784644678152e-09}


100%|██████████| 3/3 [00:00<00:00, 10.27it/s]


{'ner': 3.2833281136600845e-09}


100%|██████████| 3/3 [00:00<00:00, 12.80it/s]


{'ner': 3.158647156135117e-09}


100%|██████████| 3/3 [00:00<00:00, 16.92it/s]


{'ner': 3.064979354057798e-09}


100%|██████████| 3/3 [00:00<00:00, 16.69it/s]


{'ner': 2.9610237314506163e-09}


100%|██████████| 3/3 [00:00<00:00, 16.61it/s]


{'ner': 2.8543559309194087e-09}


100%|██████████| 3/3 [00:00<00:00, 12.73it/s]


{'ner': 2.7717800884309967e-09}


100%|██████████| 3/3 [00:00<00:00, 13.95it/s]


{'ner': 2.703266464960717e-09}


100%|██████████| 3/3 [00:00<00:00, 16.49it/s]


{'ner': 2.6032268833857347e-09}


100%|██████████| 3/3 [00:00<00:00, 15.69it/s]


{'ner': 2.5167421566379242e-09}


100%|██████████| 3/3 [00:00<00:00, 15.52it/s]


{'ner': 2.4179398028586233e-09}


100%|██████████| 3/3 [00:00<00:00, 12.48it/s]


{'ner': 2.354010962525048e-09}


100%|██████████| 3/3 [00:00<00:00, 16.57it/s]


{'ner': 2.295175760150165e-09}


100%|██████████| 3/3 [00:00<00:00, 17.17it/s]


{'ner': 2.2096274264070147e-09}


100%|██████████| 3/3 [00:00<00:00, 16.46it/s]


{'ner': 2.14399598897193e-09}


100%|██████████| 3/3 [00:00<00:00, 16.07it/s]


{'ner': 2.0846492363989074e-09}


100%|██████████| 3/3 [00:00<00:00, 14.11it/s]


{'ner': 2.021407505416919e-09}


100%|██████████| 3/3 [00:00<00:00, 11.48it/s]


{'ner': 1.9596405925430266e-09}


100%|██████████| 3/3 [00:00<00:00, 11.11it/s]


{'ner': 1.90815065961511e-09}


100%|██████████| 3/3 [00:00<00:00, 10.09it/s]


{'ner': 1.8572744324553738e-09}


100%|██████████| 3/3 [00:00<00:00, 10.98it/s]


{'ner': 1.7994725239152028e-09}


100%|██████████| 3/3 [00:00<00:00, 10.85it/s]


{'ner': 1.750443381083132e-09}


100%|██████████| 3/3 [00:00<00:00, 10.51it/s]


{'ner': 1.7081358110165682e-09}


100%|██████████| 3/3 [00:00<00:00, 10.62it/s]


{'ner': 1.6561012394869398e-09}


100%|██████████| 3/3 [00:00<00:00, 10.48it/s]


{'ner': 1.6227160106700107e-09}


100%|██████████| 3/3 [00:00<00:00, 10.81it/s]


{'ner': 1.5759505793357192e-09}


100%|██████████| 3/3 [00:00<00:00, 11.24it/s]


{'ner': 1.5354389113468032e-09}


100%|██████████| 3/3 [00:00<00:00, 10.18it/s]


{'ner': 1.4835600149753115e-09}


100%|██████████| 3/3 [00:00<00:00, 10.88it/s]


{'ner': 1.4456245884138942e-09}


100%|██████████| 3/3 [00:00<00:00, 10.31it/s]


{'ner': 1.4081051832356386e-09}


100%|██████████| 3/3 [00:00<00:00, 10.57it/s]


{'ner': 1.3740537650661016e-09}


 33%|███▎      | 1/3 [00:00<00:00,  9.57it/s]