In [1]:
import json
import os
import re
import pandas as pd
import numpy as np

from collections import defaultdict
from itertools import islice

In [15]:
def get_test_results(subdir:str='./test_results/', task:str='ontonotes/', score_idx:int=3, n_metrics:int=3):
    files = list(map(lambda file: subdir+task+file, os.listdir(subdir+task)))
    results = defaultdict(list)
    batchsize = str(32)
    year = 'yyyy'
    for i, file in enumerate(files):
        if not file.endswith('.ipynb_checkpoints'):
            with open(file, 'r', encoding='utf-8') as f:
                f = f.readlines()
                for j in range(n_metrics):
                    scores = []
                    for k, line in enumerate(islice(f, 1+j, None, n_metrics)):
                        line = line.split()
                        scores.append(round(float(line[score_idx]), 3))
                    if task=='ontonotes/':
                        attention = line[0][line[0].index('sent')+len('sent'):line[0].index(batchsize)].strip('-').strip()
                    elif task=='conll2003/':
                        attention = line[0][line[0].index('2003')+len(year):line[0].index(batchsize)].strip('-').strip()
                    elif task=='semeval2010/':
                         attention = line[0][line[0].index('2010')+len(year):line[0].index(batchsize)].strip('-').strip()
                    elif task=='semeval2007/':
                         attention = line[0][line[0].index('2007')+len(year):line[0].index(batchsize)].strip('-').strip()
                    attn = 'baseline' if attention == 'bl' else attention
                    results[attn].append(np.mean(scores))
    df =  pd.DataFrame.from_dict(results, orient='index', columns=['Precision', 'Recall', 'F1'])
    return df

In [16]:
test_results_semeval2007 = get_test_results(task='semeval2007/')

In [17]:
test_results_semeval2010 = get_test_results(task='semeval2010/')

In [18]:
print(test_results_semeval2010.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  Precision &  Recall &      F1 \\
\midrule
15d\_RF\_NR\_Max\_alpha  &     0.7966 &  0.6392 &  0.7092 \\
15d\_RF\_NR\_Max\_beta   &     0.7860 &  0.6444 &  0.7078 \\
15d\_RF\_NR\_Max\_theta  &     0.7990 &  0.6082 &  0.6904 \\
15d\_RF\_TSR\_Max\_alpha &     0.7904 &  0.6456 &  0.7104 \\
15d\_RF\_TSR\_Max\_beta  &     0.7916 &  0.6378 &  0.7062 \\
15d\_RF\_TSR\_Max\_theta &     0.7914 &  0.6444 &  0.7100 \\
baseline             &     0.8002 &  0.6322 &  0.7056 \\
BNCfreqinv           &     0.7830 &  0.5802 &  0.6652 \\
MeanFixCont          &     0.7958 &  0.6036 &  0.6862 \\
\bottomrule
\end{tabular}



In [19]:
test_results_conll = get_test_results(task='conll2003/')

In [20]:
print(test_results_conll.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  Precision &  Recall &      F1 \\
\midrule
15d\_RF\_NR\_Max\_alpha  &     0.9322 &  0.9628 &  0.9472 \\
15d\_RF\_NR\_Max\_beta   &     0.9138 &  0.9636 &  0.9382 \\
15d\_RF\_NR\_Max\_theta  &     0.9236 &  0.9692 &  0.9458 \\
15d\_RF\_TSR\_Max\_alpha &     0.9092 &  0.9684 &  0.9378 \\
15d\_RF\_TSR\_Max\_beta  &     0.9104 &  0.9632 &  0.9358 \\
15d\_RF\_TSR\_Max\_theta &     0.9030 &  0.9678 &  0.9344 \\
baseline             &     0.9448 &  0.9684 &  0.9566 \\
BNCfreqinv           &     0.9514 &  0.9782 &  0.9646 \\
MeanFixCont          &     0.9432 &  0.9694 &  0.9562 \\
\bottomrule
\end{tabular}



In [33]:
test_results_onto = get_test_results(task='ontonotes/')

In [34]:
bl = {'bl': {'Precision': 0.8890, 'Recall': 0.6446, 'F1': 0.7472}}

In [35]:
bl = pd.DataFrame.from_dict(bl, orient='index')

In [42]:
test_results_onto = test_results_onto.append(bl)

In [44]:
test_results_onto

Unnamed: 0,Precision,Recall,F1
15d_RF_NR_Max_alpha,0.9124,0.6736,0.7752
15d_RF_NR_Max_beta,0.9184,0.67,0.7746
15d_RF_NR_Max_theta,0.9166,0.6708,0.7748
15d_RF_TSR_Max_alpha,0.9096,0.672,0.773
15d_RF_TSR_Max_beta,0.9096,0.6698,0.7716
15d_RF_TSR_Max_theta,0.9056,0.673,0.772
30d_RF_NR_Max_alpha,0.9104,0.677,0.7764
30d_RF_NR_Max_theta,0.9226,0.667,0.7738
BNCfreqinv,0.9156,0.674,0.7762
MeanFixCont,0.922,0.666,0.7734


In [43]:
print(test_results_onto.to_latex())

\begin{tabular}{lrrr}
\toprule
{} &  Precision &  Recall &      F1 \\
\midrule
15d\_RF\_NR\_Max\_alpha  &     0.9124 &  0.6736 &  0.7752 \\
15d\_RF\_NR\_Max\_beta   &     0.9184 &  0.6700 &  0.7746 \\
15d\_RF\_NR\_Max\_theta  &     0.9166 &  0.6708 &  0.7748 \\
15d\_RF\_TSR\_Max\_alpha &     0.9096 &  0.6720 &  0.7730 \\
15d\_RF\_TSR\_Max\_beta  &     0.9096 &  0.6698 &  0.7716 \\
15d\_RF\_TSR\_Max\_theta &     0.9056 &  0.6730 &  0.7720 \\
30d\_RF\_NR\_Max\_alpha  &     0.9104 &  0.6770 &  0.7764 \\
30d\_RF\_NR\_Max\_theta  &     0.9226 &  0.6670 &  0.7738 \\
BNCfreqinv           &     0.9156 &  0.6740 &  0.7762 \\
MeanFixCont          &     0.9220 &  0.6660 &  0.7734 \\
bl                   &     0.8890 &  0.6446 &  0.7472 \\
\bottomrule
\end{tabular}



In [15]:
def extract_best_scores(task:str, subdir:str='./results_attention'):
    files = [subdir + task + f for f in os.listdir(os.path.join(subdir + task)) if f.endswith('.txt')]
    all_results = defaultdict(dict)
    for file in files:
        with open(file, 'r', encoding='utf-8') as f:
            file = file.lstrip(subdir + task + 'summary.tuneall.').rstrip('.txt')
            results = [line.strip().split() for line in islice(f, 1, None)]
            f_scores = filter(lambda el:el[2] == 'sent_f', results)
            f_scores = sorted(f_scores, key = lambda el:el[3], reverse=True)
            best_hypers = f_scores[0][0]
            best_scores = list(filter(lambda el:el[0] == best_hypers, results))
            if task == '/CoNLL2003/':
                best_hypers = best_hypers.lstrip('./exp/dev-conll2003-').rstrip('/output.txt')
            elif task == '/Ontonotes/':
                best_hypers = best_hypers.lstrip('./exp/dev-ontonotes_sent-').rstrip('/output.txt')
            elif task == '/SemEval2010/':
                best_hypers = best_hypers.lstrip('./exp/dev-semeval2010_sent-').rstrip('/output.txt')
            elif task == '/SemEval2007/':
                best_hypers = best_hypers.lstrip('./exp/dev-semeval2007_sent-').rstrip('/output.txt')
            all_results[file][best_hypers] = {score[2]:score[3] for score in best_scores}
    with open(subdir + task + 'results.json', 'w') as res:
        json.dump(all_results, res)
    return all_results

In [16]:
extract_best_scores(task='/SemEval2007/')

defaultdict(dict,
            {'15d_RF_NR_Max_alpha': {'15d_RF_NR_Max_alpha-32-0.01-0.4-1.0-42-1': {'sent_p': '0.5748031496062992',
               'sent_r': '0.9605263157894737',
               'sent_f': '0.7192118226600984'}},
             '15d_RF_NR_Max_beta': {'15d_RF_NR_Max_beta-32-0.01-1.0-1.0-42-0.5': {'sent_p': '0.573076923076923',
               'sent_r': '0.9802631578947368',
               'sent_f': '0.7233009708737863'}},
             '15d_RF_NR_Max_theta': {'15d_RF_NR_Max_theta-32-0.1-0.2-1.0-42-decreasing': {'sent_p': '0.5588235294117647',
               'sent_r': '1.0',
               'sent_f': '0.7169811320754718'}},
             '15d_RF_TSR_Max_alpha': {'15d_RF_TSR_Max_alpha-32-0.1-0.2-1.0-42-1': {'sent_p': '0.5676691729323309',
               'sent_r': '0.993421052631579',
               'sent_f': '0.722488038277512'}},
             '15d_RF_TSR_Max_beta': {'15d_RF_TSR_Max_beta-32-0.01-0.8-1.0-42-decreasing': {'sent_p': '0.5681818181818182',
               'sent_r': '0.

In [12]:
extract_best_scores(task='/CoNLL2003/')

defaultdict(dict,
            {'15d_RF_NR_Max_alpha': {'15d_RF_NR_Max_alpha-32-0.1-0.2-1.0-42-1': {'sent_p': '0.9561933534743202',
               'sent_r': '0.9723502304147466',
               'sent_f': '0.9642041127189642'}},
             '15d_RF_NR_Max_beta': {'15d_RF_NR_Max_beta-32-0.1-0.4-1.0-42-1': {'sent_p': '0.9597570235383447',
               'sent_r': '0.9708141321044547',
               'sent_f': '0.965253913707522'}},
             '15d_RF_NR_Max_theta': {'15d_RF_NR_Max_theta-32-0.1-0.4-1.0-42-1': {'sent_p': '0.9571320182094082',
               'sent_r': '0.9688940092165899',
               'sent_f': '0.9629770992366411'}},
             '15d_RF_TSR_Max_alpha': {'15d_RF_TSR_Max_alpha-32-0.1-1.0-1.0-42-decreasing': {'sent_p': '0.9431226765799257',
               'sent_r': '0.9742703533026114',
               'sent_f': '0.9584435209671326'}},
             '15d_RF_TSR_Max_beta': {'15d_RF_TSR_Max_beta-32-0.1-0.6-1.0-42-0.25': {'sent_p': '0.9459157030958597',
               'sent_r

In [13]:
extract_best_scores(task='/Ontonotes/')

defaultdict(dict,
            {'15d_RF_NR_Max_alpha': {'15d_RF_NR_Max_alpha-32-0.1-0.6-1.0-42-decreasing': {'sent_p': '0.8934934159566228',
               'sent_r': '0.6783299029697147',
               'sent_f': '0.7711850242353334'}},
             '15d_RF_NR_Max_theta': {'15d_RF_NR_Max_theta-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.8939393939393939',
               'sent_r': '0.6765657159658924',
               'sent_f': '0.7702092050209205'}},
             '15d_RF_TSR_Max_alpha': {'15d_RF_TSR_Max_alpha-32-0.1-0.2-1.0-42-0.125': {'sent_p': '0.9024096385542169',
               'sent_r': '0.6606880329314907',
               'sent_f': '0.7628585978611442'}},
             '15d_RF_TSR_Max_theta': {'15d_RF_TSR_Max_theta-32-0.1-0.2-1.0-42-0.125': {'sent_p': '0.8943025540275049',
               'sent_r': '0.669214936783299',
               'sent_f': '0.765556676757484'}},
             'bl': {'bl-32-0.1-0.2-1.0-42-0.5': {'sent_p': '0.896578843885175',
               'sent_r': '0.6703910614

In [14]:
extract_best_scores(task='/SemEval2010/')

defaultdict(dict,
            {'5d_RF_NR_Max_alpha': {'5d_RF_NR_Max_alpha-32-0.1-0.4-1.0-42-decreasing': {'sent_p': '0.7363636363636363',
               'sent_r': '0.6',
               'sent_f': '0.6612244897959183'}},
             '5d_RF_NR_Max_beta': {'5d_RF_NR_Max_beta-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.7772277227722773',
               'sent_r': '0.5814814814814815',
               'sent_f': '0.6652542372881356'}},
             '5d_RF_NR_Max_theta': {'5d_RF_NR_Max_theta-32-0.1-0.6-1.0-42-0.125': {'sent_p': '0.7083333333333334',
               'sent_r': '0.6296296296296297',
               'sent_f': '0.6666666666666667'}},
             '5d_RF_TSR_Max_alpha': {'5d_RF_TSR_Max_alpha-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.7581395348837209',
               'sent_r': '0.6037037037037037',
               'sent_f': '0.6721649484536082'}},
             '5d_RF_TSR_Max_beta': {'5d_RF_TSR_Max_beta-32-0.1-0.8-1.0-42-decreasing': {'sent_p': '0.7379912663755459',
               'se