In [1]:
import pandas as pd

#### Create empty results file

In [2]:
df = pd.DataFrame(columns=['model','dataset','mode','filtered', 'precision', 'recall','f1','acc'])

```
df.to_pickle("results.pkl")
```

### EUPEG Results

In [23]:
eupeg_results = pd.read_excel('geoparsing-results-EUPEG.xlsx')
eupeg_results.head()

Unnamed: 0,dataset,geoparser,precision,recall,f-score,accuracy
0,LGL,Edinburgh,0.723,0.383,0.501,0.383
1,LGL,TopoCluster,0.763,0.577,0.657,0.577
2,LGL,CLAVIN,0.808,0.444,0.573,0.444
3,LGL,CamCoder,0.811,0.548,0.654,0.548
4,LGL,StanfordNER,0.744,0.622,0.677,0.622


In [27]:
eupeg_subset = eupeg_results[(eupeg_results['dataset'].isin(['LGL', 'TR-News', 'GeoWebNews'])) &
                             (eupeg_results['geoparser'].isin(['StanfordNER', 'DM_NLP+Pop', 'UniMelb+Pop']))]

eupeg_subset.groupby('geoparser').mean().round(3)

Unnamed: 0_level_0,precision,recall,f-score,accuracy
geoparser,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DM_NLP+Pop,0.781,0.62,0.69,
StanfordNER,0.84,0.663,0.74,0.663
UniMelb+Pop,0.796,0.634,0.703,


### My Results

In [75]:
results = pd.read_pickle('results.pkl')
results.head()

Unnamed: 0,model,dataset,mode,filtered,precision,recall,f1,acc
0,flair/ner-english,DutchPolicyDocs,strict,False,0.549559,0.28018,0.371142,0.180047
1,flair/ner-english,DutchPolicyDocs,forgiving,False,0.648872,0.328595,0.436263,0.212701
2,flair/ner-english,TR-News,strict,False,0.886099,0.768274,0.82299,0.749052
3,flair/ner-english,TR-News,forgiving,False,0.913004,0.789147,0.84657,0.771797
4,flair/ner-english,LGL,strict,False,0.797386,0.685944,0.737478,0.671384


#### Metric results on English corpora

In [30]:
results_english_corpora = results[results['dataset'] != 'DutchPolicyDocs']

In [37]:
results_english_corpora_s = results_english_corpora[results_english_corpora['mode'] == 'strict']
results_english_corpora_f = results_english_corpora[results_english_corpora['mode'] == 'forgiving']

In [41]:
model_metrics_avg_s = results_english_corpora_s.groupby('model').mean().round(3).rename(columns={'precision': 'precision_s','recall': 'recall_s','f1': 'f1_s', 'acc': 'acc_s'})

model_metrics_avg_f = results_english_corpora_f.groupby('model').mean().round(3).rename(columns={'precision': 'precision_f','recall': 'recall_f','f1': 'f1_f', 'acc': 'acc_f'})

In [46]:
tmp = pd.concat([model_metrics_avg_s, model_metrics_avg_f], axis=1)

tmp['avg_precision'] = tmp['precision_s'].astype(str) + ' (' + tmp['precision_f'].astype(str) + ')' 

tmp['avg_recall'] = tmp['recall_s'].astype(str) + ' (' + tmp['recall_f'].astype(str) + ')' 

tmp['avg_f1'] = tmp['f1_s'].astype(str) + ' (' + tmp['f1_f'].astype(str) + ')' 

tmp['avg_acc'] = tmp['acc_s'].astype(str) + ' (' + tmp['acc_f'].astype(str) + ')' 

tmp = tmp[['avg_precision', 'avg_recall', 'avg_f1', 'avg_acc']]
tmp

# tmp.to_excel('models-avg-metrics.xlsx')

Unnamed: 0_level_0,avg_precision,avg_recall,avg_f1,avg_acc
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LaBSE,0.725 (0.772),0.696 (0.742),0.709 (0.755),0.679 (0.725)
XLM-R-ner,0.8 (0.9),0.539 (0.602),0.643 (0.721),0.496 (0.558)
en_core_web_lg,0.729 (0.924),0.514 (0.65),0.602 (0.762),0.492 (0.621)
en_core_web_trf,0.759 (0.961),0.552 (0.697),0.639 (0.807),0.526 (0.664)
flair/ner-dutch-large,0.871 (0.91),0.663 (0.692),0.752 (0.785),0.639 (0.669)
flair/ner-english,0.866 (0.901),0.704 (0.731),0.775 (0.806),0.686 (0.714)
flair/ner-english-large,0.886 (0.923),0.726 (0.756),0.797 (0.83),0.704 (0.735)
flair/ner-multi,0.843 (0.883),0.692 (0.724),0.758 (0.794),0.677 (0.71)
mBERT,0.734 (0.775),0.679 (0.717),0.704 (0.744),0.658 (0.697)
nl_core_news_lg,0.649 (0.753),0.331 (0.383),0.437 (0.506),0.292 (0.338)


#### Metric results on DPD corpora

In [76]:
results_DPD = results[results['dataset'] == 'DutchPolicyDocs']

In [77]:
results_DPD_s = results_DPD[results_DPD['mode'] == 'strict']
results_DPD_f = results_DPD[results_DPD['mode'] == 'forgiving']

In [78]:
model_metrics_avg_s = results_DPD_s.groupby('model').mean().round(3).rename(columns={'precision': 'precision_s','recall': 'recall_s','f1': 'f1_s', 'acc': 'acc_s'})

model_metrics_avg_f = results_DPD_f.groupby('model').mean().round(3).rename(columns={'precision': 'precision_f','recall': 'recall_f','f1': 'f1_f', 'acc': 'acc_f'})

In [79]:
tmp = pd.concat([model_metrics_avg_s, model_metrics_avg_f], axis=1)

tmp['avg_precision'] = tmp['precision_s'].astype(str) + ' (' + tmp['precision_f'].astype(str) + ')' 

tmp['avg_recall'] = tmp['recall_s'].astype(str) + ' (' + tmp['recall_f'].astype(str) + ')' 

tmp['avg_f1'] = tmp['f1_s'].astype(str) + ' (' + tmp['f1_f'].astype(str) + ')' 

tmp['avg_acc'] = tmp['acc_s'].astype(str) + ' (' + tmp['acc_f'].astype(str) + ')' 

tmp = tmp[['avg_precision', 'avg_recall', 'avg_f1', 'avg_acc']]
tmp

# tmp.to_excel('models-avg-metrics.xlsx')

Unnamed: 0_level_0,avg_precision,avg_recall,avg_f1,avg_acc
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LaBSE,0.544 (0.589),0.669 (0.728),0.6 (0.651),0.604 (0.654)
XLM-R-ner,0.575 (0.663),0.5 (0.582),0.535 (0.62),0.394 (0.455)
en_core_web_lg,0.438 (0.592),0.161 (0.221),0.236 (0.321),0.084 (0.114)
en_core_web_trf,0.583 (0.682),0.372 (0.441),0.454 (0.536),0.25 (0.293)
flair/ner-dutch-large,0.65 (0.703),0.718 (0.782),0.683 (0.74),0.642 (0.694)
flair/ner-english,0.55 (0.649),0.28 (0.329),0.371 (0.436),0.18 (0.213)
flair/ner-english-large,0.651 (0.702),0.729 (0.791),0.688 (0.744),0.654 (0.705)
flair/ner-multi,0.624 (0.689),0.636 (0.709),0.63 (0.699),0.553 (0.611)
mBERT,0.526 (0.57),0.636 (0.695),0.576 (0.626),0.572 (0.621)
nl_core_news_lg,0.587 (0.692),0.48 (0.572),0.528 (0.626),0.396 (0.467)
