In [35]:
import spacy
from spacy.training.example import Example
from spacy.training import offsets_to_biluo_tags
from spacy.tokens import DocBin
from spacy.scorer import Scorer
import random

In [36]:
def create_training_data():
    TRAIN_DATA = [
        ("Trump had previously too raised questions on the need to provide monetary support to India to increase voter turnout during elections.",
         {"entities": [(0, 5, "PERSON"), (82, 87, "GPE")]}),
        ("Why are we giving $21 million to India? They got a lot more money.",
         {"entities": [(26, 31, "GPE")]}),
        ("Trump said at his Mar-a-Lago residence in Florida on February 18.",
         {"entities": [(0, 5, "PERSON"), (30, 37, "GPE"), (41, 49, "DATE")]})
    ]
    return TRAIN_DATA


In [37]:
def train_ner_model():
    nlp = spacy.blank("en")
    if "ner" not in nlp.pipe_names:
        ner = nlp.add_pipe("ner")
    else:
        ner = nlp.get_pipe("ner")

    TRAIN_DATA = create_training_data()
    for _, annotations in TRAIN_DATA:
        for ent in annotations["entities"]:
            ner.add_label(ent[2])

    optimizer = nlp.begin_training()
    for i in range(20):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in TRAIN_DATA:
            example = Example.from_dict(nlp.make_doc(text), annotations)
            nlp.update([example], losses=losses)
        print(f"Iteration {i+1}, Loss: {losses}")

    nlp.to_disk("custom_ner_model")
    print("Model saved to 'custom_ner_model'")



In [38]:
def evaluate_model():
    nlp = spacy.load("custom_ner_model")
    scorer = Scorer()
    TEST_DATA = [
        ("Trump visited India last year.", {"entities": [(0, 5, "PERSON"), (13, 18, "GPE")]}),
        ("Mar-a-Lago is located in Florida.", {"entities": [(24, 31, "GPE")]})
    ]
    examples = []
    for text, annotations in TEST_DATA:
        doc = nlp.make_doc(text)
        example = Example.from_dict(doc, annotations)
        example.predicted = nlp(text)
        examples.append(example)
    scores = scorer.score(examples)
    print("Evaluation Scores:", scores)

if __name__ == "__main__":
    train_ner_model()
    evaluate_model()


Iteration 1, Loss: {'ner': np.float32(44.85943)}
Iteration 2, Loss: {'ner': np.float32(37.543976)}
Iteration 3, Loss: {'ner': np.float32(19.422052)}
Iteration 4, Loss: {'ner': np.float32(3.2424)}
Iteration 5, Loss: {'ner': np.float32(1.8486869)}
Iteration 6, Loss: {'ner': np.float32(1.3118305)}
Iteration 7, Loss: {'ner': np.float32(0.5765054)}
Iteration 8, Loss: {'ner': np.float32(0.11403555)}
Iteration 9, Loss: {'ner': np.float32(0.0009413567)}
Iteration 10, Loss: {'ner': np.float32(6.392342e-05)}
Iteration 11, Loss: {'ner': np.float32(3.108914e-07)}
Iteration 12, Loss: {'ner': np.float32(4.5747566e-07)}
Iteration 13, Loss: {'ner': np.float32(1.5079807e-09)}
Iteration 14, Loss: {'ner': np.float32(2.9215624e-08)}
Iteration 15, Loss: {'ner': np.float32(5.4800706e-09)}
Iteration 16, Loss: {'ner': np.float32(1.5945917e-09)}
Iteration 17, Loss: {'ner': np.float32(7.0654123e-09)}
Iteration 18, Loss: {'ner': np.float32(1.897099e-09)}
Iteration 19, Loss: {'ner': np.float32(1.128743e-09)}
Iter