In [69]:
import spacy

In [70]:
nlp=spacy.load('en_core_web_sm')
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer']

In [71]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")

In [72]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label)

Australia 0 9 384


In [73]:
doc = nlp("I do not have money to pay my credit card account")

In [74]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label)

In [75]:
doc = nlp("what is the process to open new savings account")

In [76]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label)

In [77]:
train = [("Money transfer from my checking account is not working", {"entities": [(6, 13, "ACTIVITY"), (23, 39, 'PRODUCT')]}),
         ("I want to check balance in my savings account", {"entities": [(16, 23, "ACTIVITY"), (30, 45, "PRODUCT")]}),
         ("I suspect a fraud in my credit card account", {"entities": [(12, 17, "ACTIVITY"), (24, 35, 'PRODUCT')]}),
         ("I am here for opening a new savings account", {"entities": [(14, 21, "ACTIVITY"), (28, 43, 'PRODUCT')]}),
         ("Your mortgage is in delinquent status", {"entities": [(20, 30, "ACTIVITY"), (5, 13, "PRODUCT")]}),
         ("Your credit card is in past due status", {"entities": [(23, 31, "ACTIVITY"), (5, 16, 'PRODUCT')]}),
         ("My loan account is still not approved and funded", {"entities": [(25, 37, "ACTIVITY"), (3, 15, 'PRODUCT')]}),
         ("How do I open a new loan account", {"entities": [(9, 13, "ACTIVITY"), (28, 32,"PRODUCT")]}),
         ("What are the charges on Investment account", {"entities": [(13, 20, "ACTIVITY"), (24, 42, 'PRODUCT')]}),
         ("Can you explain late charges on my credit card", {"entities": [(21, 28, "ACTIVITY"), (35, 46, 'PRODUCT')]}),
         ("I want to open a new loan account", { "entities": [(18, 14, "ACTIVITY"), (21, 33,"PRODUCT")]}),
         ("Can you help updating payment on my credit card", {"entities": [(22, 29, "ACTIVITY"), (36, 47, 'PRODUCT')]}),
         ("When is the payment due date on my card", {"entities": [(12, 19, "ACTIVITY"), (35, 39, 'PRODUCT')]})
        ]

In [78]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer']

In [79]:
ner=nlp.get_pipe("ner")

In [80]:
for _, annotations in train:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

In [81]:
disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']

In [82]:
import random
from spacy.util import minibatch, compounding
from pathlib import Path
from spacy.training.example import Example

with nlp.disable_pipes(*disable_pipes):
  optimizer = nlp.resume_training()

  for iteration in range(100):

    random.shuffle(train)
    losses = {}

    batches = minibatch(train, size=compounding(1.0, 4.0, 1.001))
    for batch in batches:
        text, annotation = zip(*batch)
        example = Example.from_dict(doc, annotations)
        nlp.update([example],drop=0.5, losses=losses ,sgd=optimizer)

        print("Losses", losses)

Losses {'ner': 1.9846815278940217}
Losses {'ner': 4.006581341967075}
Losses {'ner': 6.003139808993642}
Losses {'ner': 9.0171839470111}
Losses {'ner': 10.945664068079976}
Losses {'ner': 12.984719544612851}
Losses {'ner': 14.975121161462992}
Losses {'ner': 16.95229332874732}
Losses {'ner': 18.802462298059876}
Losses {'ner': 20.76818131143771}
Losses {'ner': 22.728522877303252}
Losses {'ner': 24.041844713521474}
Losses {'ner': 25.21523963149213}
Losses {'ner': 1.5908660628711875}
Losses {'ner': 2.8640876549421836}
Losses {'ner': 4.401456690943293}
Losses {'ner': 9.442876334284508}
Losses {'ner': 10.644217328756556}
Losses {'ner': 11.991696417624512}
Losses {'ner': 13.61900867348937}
Losses {'ner': 15.105296509284196}
Losses {'ner': 16.434326303796418}
Losses {'ner': 17.47202634950481}
Losses {'ner': 18.58885197317405}
Losses {'ner': 19.873993408840725}
Losses {'ner': 21.686065193918516}
Losses {'ner': 1.3084678577765985}
Losses {'ner': 2.663652733688373}
Losses {'ner': 3.5076470240634947}

In [83]:
for text, _ in train:
    doc = nlp(text)
    print('Entities',[(ent.text, ent.label_) for ent in doc.ents])

Entities []
Entities []
Entities []
Entities [('payment', 'ACTIVITY')]
Entities []
Entities []
Entities []
Entities []
Entities []
Entities []
Entities []
Entities []
Entities []


In [84]:
from spacy import displacy

doc = nlp("what is the process to open a new savings account")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)    

process 12 19 ACTIVITY


In [85]:
doc = nlp("My credit card payment will be delayed")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [86]:
doc = nlp("what are the charges on credit card late payment in Bank of America")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)

Bank of America 52 67 ORG


In [87]:
doc = nlp("I lost my investment account password and cannot open my account now")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)

In [88]:
doc = nlp("what is the status of my loan account")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

status 12 18 ACTIVITY


In [89]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Google 38 44 ACTIVITY
