In [1]:
import pickle
import en_core_web_sm
import spacy
import random
from spacy import displacy

In [2]:
filename = '../training_data'
infile = open(filename,'rb')
TRAIN_DATA = pickle.load(infile)
infile.close()

In [3]:
nlp=spacy.load('en_core_web_sm')

In [4]:
def train_model(train_data):
    if 'ner' not in nlp.pipe_names:
        ner=nlp.create_pipe('ner')
        nlp.add_pipe(ner,last=True)
    else:
        ner = nlp.get_pipe("ner")
    for _,annotation in train_data:
        for ent in annotation['entities']:
            ner.add_label(ent[2])
    


    other_pipes = [pipe for pipe in nlp.pipe_names if pipe !='ner']
    # only train NER
    with nlp.disable_pipes(*other_pipes):
        optimizer=nlp.entity.create_optimizer()
        for itn in range(10):
            random.shuffle(train_data)
            losses = {}
            index=0
            for text,annotations in train_data:
                try:
                    nlp.update(
                        [text],  # batch of texts
                        [annotations],  # batch of annotations
                        drop=0.2,
                        sgd=optimizer,  # dropout - make it harder to memorise data
                        losses=losses)
                except Exception as e:
                    pass
            print(losses)
        
    

In [5]:
train_model(TRAIN_DATA)

{'ner': 856.1193560325812}
{'ner': 691.8476652781289}
{'ner': 740.4969384513392}
{'ner': 744.4245401407711}
{'ner': 667.0599308201317}
{'ner': 710.6064292415322}
{'ner': 739.5950823261201}
{'ner': 651.5569385116476}
{'ner': 704.2692743114118}
{'ner': 754.2017030112061}


In [15]:
nlp.to_disk('nlp_model')

In [16]:
nlp = spacy.load('nlp_model')
doc = nlp("Va Tech Wabag is quoting ex-split today. The company approved a proposal to sub-divide each ordinary equity share of face value of Rs 5/- each into face value of Rs 2 each fully paid up on May 26, 2011. The record date has been fixed at August 17.")
for ent in doc.ents:
    print(ent.text,ent.label_)

ex-split CA_TYPE
face value of Rs 5/- each into face value of Rs 2 each fully paid up on May 26, 2011. PURPOSE


In [14]:
displacy.serve(doc,style='ent')


Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.
