# Evaluating the spaCy NER model

In [1]:
import spacy
from spacy.tokens import DocBin
from spacy.scorer import Scorer
from spacy.training import Example

In [2]:
# Load the trained model
nlp = spacy.load("./output/model-best")

# Load the test dataset
test_data = DocBin().from_disk("./data/test.spacy")
test_docs = list(test_data.get_docs(nlp.vocab))

# Initialize the scorer
scorer = Scorer(default_lang=nlp.lang, default_pipeline=nlp.pipe_names)

# Use the model to predict the document entities
predictions = [Example(nlp(doc.text), doc) for doc in test_docs]

# Score the predictions against the annotations in the test dataset
scores = scorer.score(predictions)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Print the desired metrics
print("F1\t| Precision\t| Recall")
print(
    f"{round(scores['ents_f'], 5)}\t| {round(scores['ents_p'], 5)}\t| {round(scores['ents_r'], 5)}"
)
print("--------------------")
for label, score in scores["ents_per_type"].items():
    tabs = "\t" * (3 - len(label) // 5)
    print(
        f"{label}:{tabs}{round(score['p'], 5)}\t{round(score['r'], 5)}\t{round(score['f'], 5)}"
    )

F1	| Precision	| Recall
0.75	| 0.82317	| 0.68878
--------------------
NAME_STUDENT:	0.80405	0.70414	0.75079
URL_PERSONAL:	1.0	0.66667	0.8
ID_NUM:		1.0	0.5	0.66667
EMAIL:		1.0	1.0	1.0
