In [1]:
from __future__ import unicode_literals, print_function
import plac
import random
from pathlib import Path
import spacy
import json
import io
from spacy.util import minibatch, compounding

In [2]:
LABELS = ['DATE OF TRANSACTION', 'TIME OF TRANSACTION', 'ACCOUNT NO', 'TOTAL BALANCE', 'CARD NO', 'LEDGER BALANCE', 
          'COMBINED BALANCE', 'CHEQUE NO', 'CLEARING BALANCE', 'VENDOR LIST', 'ACCOUNT BALANCE', 'CLOSING BALANCE']

with io.open('C:/Users/Simrandeep/Desktop/train_balance.json', encoding='utf8') as f:
    TRAIN_DATA = json.load(f)

In [3]:
@plac.annotations(
    model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
    new_model_name=("New model name for model meta.", "option", "nm", str),
    output_dir=("Optional output directory", "option", "o", Path),
    n_iter=("Number of training iterations", "option", "n", int))

def main(model=None, new_model_name='model', output_dir='C:\\Users\\Simrandeep\\Desktop\\Think\\ner output\\balance', n_iter=50):
    """Set up the pipeline and entity recognizer, and train the new entity."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank('en')  # create blank Language class
        print("Created blank 'en' model")

    # Add entity recognizer to model if it's not in the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner)
    # otherwise, get it, so we can add labels to it
    else:
        ner = nlp.get_pipe('ner')
    
    for label in LABELS:
        ner.add_label(label)   # add new entity label to entity recognizer

    # get names of other pipes to disable them during training
    random.seed(7)
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=0.35,
                           losses=losses)
            print(losses)

    # test the trained model
    test_text = 'THE ACCOUNT NUMBER XXXXXXXX254103 HAS LEDGER BALANCE OF RS 8754.34 AND AVAILABLE BALANCE OF RS 8754.34 AS ON 14/02/2018 AT 03:02.'
    doc = nlp(test_text)
    print("Entities in '%s'" % test_text)
    for ent in doc.ents:
        print(ent.label_, ent.text)

    # save model to output directory
    if output_dir is not None:
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
        nlp.meta['name'] = new_model_name  # rename model
        nlp.to_disk(output_dir)
        print("Saved model to", output_dir)

        # test the saved model
        print("Loading from", output_dir)
        nlp2 = spacy.load(output_dir)
        doc2 = nlp2(test_text)
        for ent in doc2.ents:
            print(ent.label_, ent.text)

main()

Created blank 'en' model
{'ner': 20.982623775754291}
{'ner': 10.92291052039711}
{'ner': 6.5293833429170807}
{'ner': 11.384915164251188}
{'ner': 8.2800714179368171}
{'ner': 5.3146514327072714}
{'ner': 3.1955476586673623}
{'ner': 2.792840809548589}
{'ner': 2.6939472047092967}
{'ner': 1.7445516466266544}
{'ner': 1.9075971461758068}
{'ner': 2.3603735493395899}
{'ner': 1.4855957111028517}
{'ner': 1.1551367467776428}
{'ner': 1.2383106059704536}
{'ner': 0.54290494729446481}
{'ner': 0.69476572483958898}
{'ner': 0.60802599298728399}
{'ner': 0.79266027195367517}
{'ner': 0.69748499422048305}
{'ner': 0.61325245945279883}
{'ner': 0.21908239016520095}
{'ner': 1.2461364555256613}
{'ner': 0.59805043473811614}
{'ner': 0.59201137202061049}
{'ner': 0.6992538253028695}
{'ner': 0.43932338624593659}
{'ner': 0.28292104391356515}
{'ner': 0.70801233584301182}
{'ner': 0.53083137645532763}
{'ner': 0.19857657631750125}
{'ner': 0.00069613698139184776}
{'ner': 0.027209845276029123}
{'ner': 0.11654657021306629}
{'ne

In [14]:
import spacy
test_text=['YOUR BALANCE FOR ACCOUNT NO. XXX554 IS RS. 930.66. HAVE A NICE DAY!.']
output_dir= 'C:\\Users\\Simrandeep\\Desktop\\Think\\ner output\\balance'

# test the saved model
print("Loading from", output_dir)
for text in test_text:
    print("")
    print("Entities in '%s'" % text)
    nlp2 = spacy.load(output_dir)
    doc2 = nlp2(text)
    for ent in doc2.ents:
        print(ent.label_, ent.text)

Loading from C:\Users\Simrandeep\Desktop\Think\ner output\balance

Entities in 'YOUR BALANCE FOR ACCOUNT NO. XXX554 IS RS. 930.66. HAVE A NICE DAY!.'
ACCOUNT NO XXX554
LEDGER BALANCE RS.
TIME OF TRANSACTION 930.66
