In [1]:
import json
import os
import re
import pandas as pd
import numpy as np

from collections import defaultdict
from itertools import islice

In [2]:
def get_test_results(subdir:str='./test_results/', task:str='ontonotes/', score_idx:int=3, n_metrics:int=3):
    files = list(map(lambda file: subdir+task+file, os.listdir(subdir+task)))
    results = defaultdict(list)
    batchsize = str(32)
    year = 'yyyy'
    for i, file in enumerate(files):
        if not file.endswith('.ipynb_checkpoints'):
            with open(file, 'r', encoding='utf-8') as f:
                f = f.readlines()
                for j in range(n_metrics):
                    scores = []
                    for k, line in enumerate(islice(f, 1+j, None, n_metrics)):
                        line = line.split()
                        scores.append(round(float(line[score_idx]), 4))
                    if task=='ontonotes/':
                        attention = line[0][line[0].index('sent')+len('sent'):line[0].index(batchsize)].strip('-').strip()
                    elif task=='conll2003/':
                        attention = line[0][line[0].index('2003')+len(year):line[0].index(batchsize)].strip('-').strip()
                    elif task=='semeval2010/':
                         attention = line[0][line[0].index('2010')+len(year):line[0].index(batchsize)].strip('-').strip()
                    elif task=='semeval2007/':
                         attention = line[0][line[0].index('2007')+len(year):line[0].index(batchsize)].strip('-').strip()
                    attn = 'baseline' if attention == 'bl' else attention
                    results[attn].append(np.mean(scores))
    df =  pd.DataFrame.from_dict(results, orient='index', columns=['Precision', 'Recall', 'F1'])
    return df

In [3]:
test_results_semeval2007 = get_test_results(task='semeval2007/')

In [4]:
test_results_semeval2007

Unnamed: 0,Precision,Recall,F1
15d_RF_NR_Max_alpha,0.44158,0.2271,0.26562
15d_RF_NR_Max_beta,0.44066,0.7085,0.4796
15d_RF_NR_Max_theta,0.4568,0.54746,0.4264
15d_RF_TSR_Max_alpha,0.4357,0.45254,0.38184
15d_RF_TSR_Max_beta,0.43902,0.32544,0.30132
15d_RF_TSR_Max_theta,0.4224,0.23558,0.18292
baseline,0.43124,0.41016,0.3755
BNCfreqinv,0.5152,0.26778,0.21802
MeanFixCont,0.42562,0.53052,0.4215


In [5]:
test_results_semeval2010 = get_test_results(task='semeval2010/')

In [6]:
test_results_semeval2010

Unnamed: 0,Precision,Recall,F1
15d_RF_NR_Max_alpha,0.7966,0.6393,0.7091
15d_RF_NR_Max_beta,0.7858,0.6443,0.70792
15d_RF_NR_Max_theta,0.79906,0.60856,0.69036
15d_RF_TSR_Max_alpha,0.79024,0.64572,0.7104
15d_RF_TSR_Max_beta,0.79148,0.63786,0.70602
15d_RF_TSR_Max_theta,0.79138,0.64428,0.7101
30d_RF_TSR_Max_alpha,0.7947,0.62,0.69632
30d_RF_TSR_Max_theta,0.79964,0.645,0.71344
baseline,0.80028,0.63214,0.70566
BNCfreqinv,0.78302,0.58002,0.66522


In [7]:
print(test_results_semeval2010.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  Precision &   Recall &       F1 \\
\midrule
15d\_RF\_NR\_Max\_alpha  &    0.79660 &  0.63930 &  0.70910 \\
15d\_RF\_NR\_Max\_beta   &    0.78580 &  0.64430 &  0.70792 \\
15d\_RF\_NR\_Max\_theta  &    0.79906 &  0.60856 &  0.69036 \\
15d\_RF\_TSR\_Max\_alpha &    0.79024 &  0.64572 &  0.71040 \\
15d\_RF\_TSR\_Max\_beta  &    0.79148 &  0.63786 &  0.70602 \\
15d\_RF\_TSR\_Max\_theta &    0.79138 &  0.64428 &  0.71010 \\
30d\_RF\_TSR\_Max\_alpha &    0.79470 &  0.62000 &  0.69632 \\
30d\_RF\_TSR\_Max\_theta &    0.79964 &  0.64500 &  0.71344 \\
baseline             &    0.80028 &  0.63214 &  0.70566 \\
BNCfreqinv           &    0.78302 &  0.58002 &  0.66522 \\
MeanFixCont          &    0.79590 &  0.60358 &  0.68618 \\
\bottomrule
\end{tabular}



In [8]:
test_results_conll = get_test_results(task='conll2003/')

In [9]:
test_results_conll 

Unnamed: 0,Precision,Recall,F1
15d_RF_NR_Max_alpha,0.93236,0.96276,0.94726
15d_RF_NR_Max_beta,0.91374,0.9637,0.938
15d_RF_NR_Max_theta,0.9235,0.96908,0.94574
15d_RF_TSR_Max_alpha,0.9091,0.96834,0.93778
15d_RF_TSR_Max_beta,0.91044,0.9631,0.936
15d_RF_TSR_Max_theta,0.90308,0.968,0.93438
baseline,0.94494,0.9685,0.95656
BNCfreqinv,0.95152,0.9781,0.96458
MeanFixCont,0.94332,0.96928,0.95608


In [10]:
print(test_results_conll.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  Precision &   Recall &       F1 \\
\midrule
15d\_RF\_NR\_Max\_alpha  &    0.93236 &  0.96276 &  0.94726 \\
15d\_RF\_NR\_Max\_beta   &    0.91374 &  0.96370 &  0.93800 \\
15d\_RF\_NR\_Max\_theta  &    0.92350 &  0.96908 &  0.94574 \\
15d\_RF\_TSR\_Max\_alpha &    0.90910 &  0.96834 &  0.93778 \\
15d\_RF\_TSR\_Max\_beta  &    0.91044 &  0.96310 &  0.93600 \\
15d\_RF\_TSR\_Max\_theta &    0.90308 &  0.96800 &  0.93438 \\
baseline             &    0.94494 &  0.96850 &  0.95656 \\
BNCfreqinv           &    0.95152 &  0.97810 &  0.96458 \\
MeanFixCont          &    0.94332 &  0.96928 &  0.95608 \\
\bottomrule
\end{tabular}



In [11]:
test_results_onto = get_test_results(task='ontonotes/')

In [12]:
bl = {'bl': {'Precision': 0.8890, 'Recall': 0.6446, 'F1': 0.7472}}

In [13]:
bl = pd.DataFrame.from_dict(bl, orient='index')

In [14]:
test_results_onto = test_results_onto.append(bl)

In [15]:
test_results_onto

Unnamed: 0,Precision,Recall,F1
15d_RF_NR_Max_alpha,0.91252,0.67356,0.77504
15d_RF_NR_Max_beta,0.9185,0.67002,0.77468
15d_RF_NR_Max_theta,0.91662,0.67104,0.77472
15d_RF_TSR_Max_alpha,0.90972,0.67172,0.77278
15d_RF_TSR_Max_beta,0.9096,0.67,0.77158
15d_RF_TSR_Max_theta,0.9057,0.67296,0.77202
30d_RF_NR_Max_alpha,0.91042,0.67676,0.77634
30d_RF_NR_Max_theta,0.92252,0.66684,0.774
BNCfreqinv,0.91556,0.67378,0.7761
MeanFixCont,0.9221,0.66588,0.77326


In [16]:
print(test_results_onto.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  Precision &   Recall &       F1 \\
\midrule
15d\_RF\_NR\_Max\_alpha  &    0.91252 &  0.67356 &  0.77504 \\
15d\_RF\_NR\_Max\_beta   &    0.91850 &  0.67002 &  0.77468 \\
15d\_RF\_NR\_Max\_theta  &    0.91662 &  0.67104 &  0.77472 \\
15d\_RF\_TSR\_Max\_alpha &    0.90972 &  0.67172 &  0.77278 \\
15d\_RF\_TSR\_Max\_beta  &    0.90960 &  0.67000 &  0.77158 \\
15d\_RF\_TSR\_Max\_theta &    0.90570 &  0.67296 &  0.77202 \\
30d\_RF\_NR\_Max\_alpha  &    0.91042 &  0.67676 &  0.77634 \\
30d\_RF\_NR\_Max\_theta  &    0.92252 &  0.66684 &  0.77400 \\
BNCfreqinv           &    0.91556 &  0.67378 &  0.77610 \\
MeanFixCont          &    0.92210 &  0.66588 &  0.77326 \\
bl                   &    0.88900 &  0.64460 &  0.74720 \\
\bottomrule
\end{tabular}



In [17]:
def extract_best_scores(task:str, subdir:str='./results_attention'):
    files = [subdir + task + f for f in os.listdir(os.path.join(subdir + task)) if f.endswith('.txt')]
    all_results = defaultdict(dict)
    for file in files:
        with open(file, 'r', encoding='utf-8') as f:
            file = file.lstrip(subdir + task + 'summary.tuneall.').rstrip('.txt')
            results = [line.strip().split() for line in islice(f, 1, None)]
            f_scores = filter(lambda el:el[2] == 'sent_f', results)
            f_scores = sorted(f_scores, key = lambda el:el[3], reverse=True)
            best_hypers = f_scores[0][0]
            best_scores = list(filter(lambda el:el[0] == best_hypers, results))
            if task == '/CoNLL2003/':
                best_hypers = best_hypers.lstrip('./exp/dev-conll2003-').rstrip('/output.txt')
            elif task == '/Ontonotes/':
                best_hypers = best_hypers.lstrip('./exp/dev-ontonotes_sent-').rstrip('/output.txt')
            elif task == '/SemEval2010/':
                best_hypers = best_hypers.lstrip('./exp/dev-semeval2010_sent-').rstrip('/output.txt')
            elif task == '/SemEval2007/':
                best_hypers = best_hypers.lstrip('./exp/dev-semeval2007_sent-').rstrip('/output.txt')
            elif task == '/Wikipedia/':
                best_hypers = best_hypers.lstrip('./exp/dev-wiki-').rstrip('/output.txt')  
            all_results[file][best_hypers] = {score[2]:score[3] for score in best_scores}
    with open(subdir + task + 'results.json', 'w') as res:
        json.dump(all_results, res)
    return all_results

In [18]:
extract_best_scores(task='/SemEval2007/')

defaultdict(dict,
            {'15d_RF_NR_Max_alpha': {'15d_RF_NR_Max_alpha-32-0.01-0.4-1.0-42-1': {'sent_p': '0.5748031496062992',
               'sent_r': '0.9605263157894737',
               'sent_f': '0.7192118226600984'}},
             '15d_RF_NR_Max_beta': {'15d_RF_NR_Max_beta-32-0.01-1.0-1.0-42-0.5': {'sent_p': '0.573076923076923',
               'sent_r': '0.9802631578947368',
               'sent_f': '0.7233009708737863'}},
             '15d_RF_NR_Max_theta': {'15d_RF_NR_Max_theta-32-0.1-0.2-1.0-42-decreasing': {'sent_p': '0.5588235294117647',
               'sent_r': '1.0',
               'sent_f': '0.7169811320754718'}},
             '15d_RF_TSR_Max_alpha': {'15d_RF_TSR_Max_alpha-32-0.1-0.2-1.0-42-1': {'sent_p': '0.5676691729323309',
               'sent_r': '0.993421052631579',
               'sent_f': '0.722488038277512'}},
             '15d_RF_TSR_Max_beta': {'15d_RF_TSR_Max_beta-32-0.01-0.8-1.0-42-decreasing': {'sent_p': '0.5681818181818182',
               'sent_r': '0.

In [19]:
extract_best_scores(task='/CoNLL2003/')

defaultdict(dict,
            {'15d_RF_NR_Max_alpha': {'15d_RF_NR_Max_alpha-32-0.1-0.2-1.0-42-1': {'sent_p': '0.9561933534743202',
               'sent_r': '0.9723502304147466',
               'sent_f': '0.9642041127189642'}},
             '15d_RF_NR_Max_beta': {'15d_RF_NR_Max_beta-32-0.1-0.4-1.0-42-1': {'sent_p': '0.9597570235383447',
               'sent_r': '0.9708141321044547',
               'sent_f': '0.965253913707522'}},
             '15d_RF_NR_Max_theta': {'15d_RF_NR_Max_theta-32-0.1-0.4-1.0-42-1': {'sent_p': '0.9571320182094082',
               'sent_r': '0.9688940092165899',
               'sent_f': '0.9629770992366411'}},
             '15d_RF_TSR_Max_alpha': {'15d_RF_TSR_Max_alpha-32-0.1-1.0-1.0-42-decreasing': {'sent_p': '0.9431226765799257',
               'sent_r': '0.9742703533026114',
               'sent_f': '0.9584435209671326'}},
             '15d_RF_TSR_Max_beta': {'15d_RF_TSR_Max_beta-32-0.1-0.6-1.0-42-0.25': {'sent_p': '0.9459157030958597',
               'sent_r

In [20]:
extract_best_scores(task='/Ontonotes/')

defaultdict(dict,
            {'15d_RF_NR_Max_alpha': {'15d_RF_NR_Max_alpha-32-0.1-0.6-1.0-42-decreasing': {'sent_p': '0.8934934159566228',
               'sent_r': '0.6783299029697147',
               'sent_f': '0.7711850242353334'}},
             '15d_RF_NR_Max_theta': {'15d_RF_NR_Max_theta-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.8939393939393939',
               'sent_r': '0.6765657159658924',
               'sent_f': '0.7702092050209205'}},
             '15d_RF_TSR_Max_alpha': {'15d_RF_TSR_Max_alpha-32-0.1-0.2-1.0-42-0.125': {'sent_p': '0.9024096385542169',
               'sent_r': '0.6606880329314907',
               'sent_f': '0.7628585978611442'}},
             '15d_RF_TSR_Max_theta': {'15d_RF_TSR_Max_theta-32-0.1-0.2-1.0-42-0.125': {'sent_p': '0.8943025540275049',
               'sent_r': '0.669214936783299',
               'sent_f': '0.765556676757484'}},
             'bl': {'bl-32-0.1-0.2-1.0-42-0.5': {'sent_p': '0.896578843885175',
               'sent_r': '0.6703910614

In [21]:
extract_best_scores(task='/SemEval2010/')

defaultdict(dict,
            {'5d_RF_NR_Max_alpha': {'5d_RF_NR_Max_alpha-32-0.1-0.4-1.0-42-decreasing': {'sent_p': '0.7363636363636363',
               'sent_r': '0.6',
               'sent_f': '0.6612244897959183'}},
             '5d_RF_NR_Max_beta': {'5d_RF_NR_Max_beta-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.7772277227722773',
               'sent_r': '0.5814814814814815',
               'sent_f': '0.6652542372881356'}},
             '5d_RF_NR_Max_theta': {'5d_RF_NR_Max_theta-32-0.1-0.6-1.0-42-0.125': {'sent_p': '0.7083333333333334',
               'sent_r': '0.6296296296296297',
               'sent_f': '0.6666666666666667'}},
             '5d_RF_TSR_Max_alpha': {'5d_RF_TSR_Max_alpha-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.7581395348837209',
               'sent_r': '0.6037037037037037',
               'sent_f': '0.6721649484536082'}},
             '5d_RF_TSR_Max_beta': {'5d_RF_TSR_Max_beta-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.7379912663755459',
               'se

In [22]:
extract_best_scores(task='/Wikipedia/')

defaultdict(dict,
            {'15d_RF_NR_Max_alpha': {'15d_RF_NR_Max_alpha-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.7391304347826086',
               'sent_r': '0.5483870967741935',
               'sent_f': '0.6296296296296297'}},
             '15d_RF_NR_Max_beta': {'15d_RF_NR_Max_beta-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.8125',
               'sent_r': '0.41935483870967744',
               'sent_f': '0.5531914893617021'}},
             '15d_RF_NR_Max_theta': {'15d_RF_NR_Max_theta-32-0.1-1.0-1.0-42-decreasing': {'sent_p': '0.7142857142857143',
               'sent_r': '0.4838709677419355',
               'sent_f': '0.5769230769230769'}},
             '15d_RF_TSR_Max_alpha': {'15d_RF_TSR_Max_alpha-32-0.1-0.2-1.0-42-decreasing': {'sent_p': '0.7272727272727273',
               'sent_r': '0.5161290322580645',
               'sent_f': '0.6037735849056604'}},
             '15d_RF_TSR_Max_beta': {'15d_RF_TSR_Max_beta-32-0.1-0.6-1.0-42-decreasing': {'sent_p': '0.9090909090909091',