In [7]:
import spacy
from spacy.training.example import Example
import random
import json

# Load small model
nlp = spacy.load("en_core_web_sm")

# Load data
with open("train_data_spacy.json", "r") as f:
    TRAIN_DATA = json.load(f)

# Get the NER pipe
ner = nlp.get_pipe("ner")

# Add new labels
for _, annotations in TRAIN_DATA:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

# Disable other pipes during training
pipe_exceptions = ["ner"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

# Training
with nlp.disable_pipes(*unaffected_pipes):
    optimizer = nlp.resume_training()
    for itn in range(20):  # number of iterations
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in TRAIN_DATA:
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotations)
            nlp.update([example], drop=0.3, losses=losses)
        print(f"Iteration {itn+1} - Loss: {losses}")

# Save the model
nlp.to_disk("ner_model")


Iteration 1 - Loss: {'ner': np.float32(0.000119165765)}
Iteration 2 - Loss: {'ner': np.float32(2.9391562e-05)}
Iteration 3 - Loss: {'ner': np.float32(1.1079147e-08)}
Iteration 4 - Loss: {'ner': np.float32(9.178846e-10)}
Iteration 5 - Loss: {'ner': np.float32(6.629118e-10)}
Iteration 6 - Loss: {'ner': np.float32(1.2201489e-11)}
Iteration 7 - Loss: {'ner': np.float32(8.052201e-09)}
Iteration 8 - Loss: {'ner': np.float32(2.4308002e-13)}
Iteration 9 - Loss: {'ner': np.float32(1.8959153e-13)}
Iteration 10 - Loss: {'ner': np.float32(1.8166048e-12)}
Iteration 11 - Loss: {'ner': np.float32(1.2427228e-13)}
Iteration 12 - Loss: {'ner': np.float32(1.5585518e-14)}
Iteration 13 - Loss: {'ner': np.float32(6.1087354e-16)}
Iteration 14 - Loss: {'ner': np.float32(4.3761846e-16)}
Iteration 15 - Loss: {'ner': np.float32(2.3060576e-13)}
Iteration 16 - Loss: {'ner': np.float32(1.4642634e-11)}
Iteration 17 - Loss: {'ner': np.float32(7.748246e-15)}
Iteration 18 - Loss: {'ner': np.float32(9.072163e-14)}
Itera

In [6]:
from seqeval.metrics import classification_report

print(classification_report(y_true, y_pred, zero_division=0))


                 precision    recall  f1-score   support

Confidentiality       1.00      1.00      1.00         1
    Termination       0.00      0.00      0.00         1

      micro avg       1.00      0.50      0.67         2
      macro avg       0.50      0.50      0.50         2
   weighted avg       0.50      0.50      0.50         2



In [2]:
import json
from pathlib import Path

# Minimal dummy training data – update with real data later
train_data = [
    ("This agreement includes a confidentiality clause.", {"entities": [(29, 44, "Confidentiality")]}),
    ("The contract may be terminated at any time by either party.", {"entities": [(23, 33, "Termination")]}),
]

# Save to JSON
with open("train_data_spacy.json", "w") as f:
    json.dump(train_data, f)

print("✓ Saved: train_data_spacy.json")


✓ Saved: train_data_spacy.json
