In [12]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

import spacy
from spacy.gold import GoldParse
from spacy.scorer import Scorer

import pprint
pp = pprint.PrettyPrinter(indent=2)

from hc_nlp.io import load_text_and_annotations_from_labelstudio
from hc_nlp.spacy_helpers import display_manual_annotations, display_ner_annotations

# to download a model:
# !python -m spacy download en_core_web_sm OR !python -m spacy download en_core_web_lg
# english language models here: https://spacy.io/models/en

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
nlp_sm = spacy.load('en_core_web_sm')
nlp_lg = spacy.load('en_core_web_lg')


In [5]:
data = load_text_and_annotations_from_labelstudio("2020-11-25-11-43-02", nlp_lg)
text, annotations = data[11]

In [14]:
def evaluate(ner_model, examples):
    scorer = Scorer()
    for input_, annot in examples:
        doc_gold_text = ner_model.make_doc(input_)
        gold = GoldParse(doc_gold_text, entities=annot)
        pred_value = ner_model(input_)
        scorer.score(pred_value, gold)
        
    entity_measures = ['ents_p', 'ents_r', 'ents_f', 'ents_per_type']
    all_results = scorer.scores
    
    return {k:all_results[k] for k in entity_measures}

examples = [(text, annotations)]

print("SMALL MODEL: ")
pp.pprint(evaluate(nlp_sm, data))
print("---")
print("LARGE MODEL: ")
pp.pprint(evaluate(nlp_lg, data))

SMALL MODEL: 
{ 'ents_f': 43.263288009888754,
  'ents_p': 36.64921465968586,
  'ents_per_type': { 'CARDINAL': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'DATE': { 'f': 75.07331378299119,
                               'p': 79.01234567901234,
                               'r': 71.50837988826815},
                     'EVENT': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'FAC': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'GPE': { 'f': 66.66666666666666,
                              'p': 65.07936507936508,
                              'r': 68.33333333333333},
                     'LANGUAGE': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'LOC': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'MONEY': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'NORP': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'ORDINAL': {'f': 0.0, 'p': 0.0, 'r': 0.0},
                     'ORG': { 'f': 44.32432432432432,
                              'p': 40.394

In [16]:
for text, annotations in data[0:20]:
    display_ner_annotations(text, nlp_lg)
    print("--")
    display_manual_annotations(text, annotations)
    print("-----")

--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----


--


-----
