# Evaluating the spaCy NER model

In [None]:
import spacy
from spacy.tokens import DocBin
from spacy.scorer import Scorer
from spacy.training import Example

In [None]:
# Load the trained model
nlp = spacy.load("./output/model-best")
print(f"nlp.lang: {nlp.lang}")
print(f"nlp.pipe_names: {nlp.pipe_names}")

# Load the test dataset
test_data = DocBin().from_disk("./data/test.spacy")
test_docs = list(test_data.get_docs(nlp.vocab))

# Initialize the scorer
scorer = Scorer(default_lang=nlp.lang, default_pipeline=nlp.pipe_names)

# Use the model to predict the document entities
predictions = [Example(nlp(doc.text), doc) for doc in test_docs]

# Score the predictions against the annotations in the test dataset
scores = scorer.score(predictions)

In [None]:
# Print the desired metrics
print(f"F1-score: {scores['ents_f']}")
print(f"Precision: {scores['ents_p']}")
print(f"Recall: {scores['ents_r']}")
print("By PII type:")
for label, score in scores["ents_per_type"].items():
    print(f"\t'{label}': {score['p']} precision, {score['r']} recall, {score['f']} F1")