# Evaluating the results of Training

In [1]:
from __future__ import unicode_literals, print_function
import spacy
from spacy.lang.es import Spanish 
from spacy import displacy
from spacy.tokens import Doc
from collections import defaultdict, Counter
from spacy.attrs import ORTH
from spacy.scorer import Scorer
from spacy.language import GoldParse
from spacy.util import minibatch, compounding

import pandas as pd
import numpy as np
import json
import plac
import random
from sklearn.model_selection import train_test_split
from pathlib import Path

#nlp = Spanish().from_disk("Documents/Research/NPL/SevillianPaintersNPL/EM Spanish Model/Trained_Model")

In [2]:
# Read Tagged Data from JSON file
with open('TaggedData_SF.json', 'r', encoding='utf-8') as fp2:
    TAGGED_DATA = json.load(fp2)

In [3]:
# Load Spacy Model

nlp = spacy.load('es_core_news_ml_EMS2')

In [4]:
def evaluate(ner_model, examples):
    scorer = Scorer()
    for sents, ents in examples:
        doc_gold = ner_model.make_doc(sents)
        gold = GoldParse(doc_gold, entities=ents['entities'])
        pred_value = ner_model(sents)
        scorer.score(pred_value, gold)
    return scorer.scores

In [5]:
# Testing how much the evaluation depends on texts included in testing data

#Generate empty dictionary for storing evaluation results of different trials
d = {}

#Loop 10 times
for x in range(0,101):
    
    #Batching the Tagged Data into training and evaluation data (80-20)

    random.shuffle(TAGGED_DATA)
    train_data = TAGGED_DATA[:400]
    test_data = TAGGED_DATA[400:]

    #Testing NER results of existing model on test data

    results = evaluate(nlp,test_data)
    d[x] = pd.DataFrame(results)

In [14]:
results

{'uas': 0.0,
 'las': 0.0,
 'ents_p': 87.87878787878788,
 'ents_r': 74.35897435897436,
 'ents_f': 80.55555555555556,
 'ents_per_type': {'LOC': {'p': 100.0,
   'r': 91.66666666666666,
   'f': 95.65217391304348},
  'MON': {'p': 100.0, 'r': 75.0, 'f': 85.71428571428571},
  'PER': {'p': 80.0, 'r': 80.0, 'f': 80.00000000000001},
  'OBJ': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'ORG': {'p': 66.66666666666666,
   'r': 66.66666666666666,
   'f': 66.66666666666666},
  'DATE': {'p': 100.0, 'r': 100.0, 'f': 100.0}},
 'tags_acc': 0.0,
 'token_acc': 100.0,
 'textcat_score': 0.0,
 'textcats_per_cat': {}}

In [6]:
print(d[3])

     uas  las  ents_p     ents_r  ents_f  \
LOC  0.0  0.0   100.0  92.307692    96.0   
MON  0.0  0.0   100.0  92.307692    96.0   
OBJ  0.0  0.0   100.0  92.307692    96.0   
ORG  0.0  0.0   100.0  92.307692    96.0   
PER  0.0  0.0   100.0  92.307692    96.0   

                                       ents_per_type  tags_acc  token_acc  \
LOC  {'p': 100.0, 'r': 90.0, 'f': 94.73684210526316}       0.0      100.0   
MON             {'p': 100.0, 'r': 100.0, 'f': 100.0}       0.0      100.0   
OBJ                   {'p': 0.0, 'r': 0.0, 'f': 0.0}       0.0      100.0   
ORG             {'p': 100.0, 'r': 100.0, 'f': 100.0}       0.0      100.0   
PER             {'p': 100.0, 'r': 100.0, 'f': 100.0}       0.0      100.0   

     textcat_score  textcats_per_cat  
LOC            0.0               NaN  
MON            0.0               NaN  
OBJ            0.0               NaN  
ORG            0.0               NaN  
PER            0.0               NaN  


In [7]:
columns=['ents_p', 'ents_r', 'ents_f', 'label','trial']
eval_data = pd.DataFrame(columns=columns)
eval_data = eval_data.fillna(0)

In [8]:
print(eval_data)

Empty DataFrame
Columns: [ents_p, ents_r, ents_f, label, trial]
Index: []


In [13]:
#Extract information from dictionary (f, p and r scores for each label within each trial) and save it into a dataframe
for x in d:
   # ev_date= d[x].loc['DATE','ents_per_type']
    ev_loc= d[x].loc['LOC','ents_per_type']
    ev_mon= d[x].loc['MON','ents_per_type']
    ev_org= d[x].loc['ORG','ents_per_type']
    ev_per= d[x].loc['PER','ents_per_type']
    #newrow1={'ents_p':ev_date['p'],'ents_r':ev_date['r'],'ents_f':ev_date['f'],'label':'DATE','trial':x}
    newrow2={'ents_p':ev_loc['p'],'ents_r':ev_loc['r'],'ents_f':ev_loc['f'],'label':'LOC','trial':x}
    newrow4={'ents_p':ev_mon['p'],'ents_r':ev_mon['r'],'ents_f':ev_mon['f'],'label':'MON','trial':x}
    newrow6={'ents_p':ev_org['p'],'ents_r':ev_org['r'],'ents_f':ev_org['f'],'label':'ORG','trial':x}
    newrow7={'ents_p':ev_per['p'],'ents_r':ev_per['r'],'ents_f':ev_per['f'],'label':'PER','trial':x}
    #eval_data=eval_data.append(newrow1,ignore_index=True)
    eval_data=eval_data.append(newrow2,ignore_index=True)
    eval_data=eval_data.append(newrow4,ignore_index=True)
    eval_data=eval_data.append(newrow6,ignore_index=True)
    eval_data=eval_data.append(newrow7,ignore_index=True)  

KeyError: 'ORG'

In [105]:
print(eval_data)

        ents_p     ents_r     ents_f label trial
0     0.000000   0.000000   0.000000  DATE     0
1    39.408867  48.192771  43.360434   LOC     0
2     0.000000   0.000000   0.000000  MISC     0
3     0.000000   0.000000   0.000000   MON     0
4     0.000000   0.000000   0.000000   OBJ     0
5     9.090909   1.369863   2.380952   ORG     0
6    45.682451  73.542601  56.357388   PER     0
7     0.000000   0.000000   0.000000  DATE     1
8    46.153846  50.704225  48.322148   LOC     1
9     0.000000   0.000000   0.000000  MISC     1
10    0.000000   0.000000   0.000000   MON     1
11    0.000000   0.000000   0.000000   OBJ     1
12   15.384615   4.545455   7.017544   ORG     1
13   62.737643  81.280788  70.815451   PER     1
14    0.000000   0.000000   0.000000  DATE     2
15   36.416185  41.447368  38.769231   LOC     2
16    0.000000   0.000000   0.000000  MISC     2
17    0.000000   0.000000   0.000000   MON     2
18    0.000000   0.000000   0.000000   OBJ     2
19   10.000000   1.6

In [106]:
#Measure mean and standard deviation of f, p and r scores for each label 
eval_data.groupby('label').agg({'ents_f':['mean','std'],'ents_p':['mean','std'],'ents_r':['mean','std']})

Unnamed: 0_level_0,ents_f,ents_f,ents_p,ents_p,ents_r,ents_r
Unnamed: 0_level_1,mean,std,mean,std,mean,std
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
DATE,0.0,0.0,0.0,0.0,0.0,0.0
LOC,44.520979,4.40579,42.088666,5.309654,47.555381,4.680152
MISC,0.0,0.0,0.0,0.0,0.0,0.0
MON,0.0,0.0,0.0,0.0,0.0,0.0
OBJ,0.0,0.0,0.0,0.0,0.0,0.0
ORG,2.744431,2.505632,8.923374,8.278483,1.668712,1.578833
PER,62.982444,4.325562,54.139124,4.884477,75.441766,3.493916
