In [1]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections

In [2]:
def collect_losses(root_dir):

    depth = 2

    dict_res_roberta = dict()
    dict_res_camem = dict()
    dict_res_xlm = dict()
    
    for subdir, dirs, files in os.walk(root_dir):
        if subdir[len(root_dir):].count(os.sep) < depth:
            for file_ in files:
                if file_ == "test_results.json":
                    with open(subdir + "/" + file_) as f:
                        d = json.load(f)
                        test_loss = d["test_loss"]
                        test_accuracy = d["test_accuracy"]
                    model_type = subdir.split(os.sep)[-1].split("_")[0]
                    if model_type == "roberta":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_roberta[dataset_name] = {"loss": test_loss, "accuracy": test_accuracy}
                    if model_type == "camem":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_camem[dataset_name] = {"loss": test_loss, "accuracy": test_accuracy}
                    elif model_type == "xlm":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_xlm[dataset_name] = {"loss": test_loss, "accuracy": test_accuracy}

    return dict_res_roberta, dict_res_camem, dict_res_xlm

def _average_dicts(dicts):
    avg_dict = collections.defaultdict(lambda: 0)
    
    for i, d in enumerate(dicts):
        for k, v in d.items():
            avg_dict[k] += v
        
    for k, v in avg_dict.items():
        avg_dict[k] /= (i+1)
    
    return avg_dict

def average_over_users(res):
    averaged_en = collections.defaultdict(list)
    averaged_it = collections.defaultdict(list)

    for k, v in res.items():
        if "en" in k and not ("nf" in k):
            averaged_en[" ".join(k.split(" ")[0:2])].append(v)
        elif "it" in k and not ("nf" in k):
            averaged_it[" ".join(k.split(" ")[0:2])].append(v)
    
    result_en = dict()
    result_it = dict()
            
    for k, v in averaged_en.items():
        result_en["avg en " + k] = _average_dicts(v)
        
    for k, v in averaged_it.items():
        result_it["avg it " + k] = _average_dicts(v)
                        
    return result_en, result_it


def process_to_present(res, column_width="1.1cm", plot_transpose=True):
    df_res = pd.DataFrame.from_dict(res).T
    # df_res = df_res.rename(columns=mapping_columns_names)
    if not plot_transpose:
        df_res = df_res.T
    s = df_res.style
    s.format(na_rep='MISS', precision=2)
    print(s.to_latex(column_format='l'+('p{'+f'{column_width}'+'}')*len(df_res.columns)))

In [3]:
dict_res_roberta, dict_res_camem, dict_res_xlm = collect_losses("../output/complexity_binary")

## ROBERTA

In [4]:
avg, _ = average_over_users(dict_res_roberta)
roberta_results = {**dict_res_roberta, ** avg}

In [5]:
pd.DataFrame(roberta_results).T

Unnamed: 0,loss,accuracy
np f en6,0.823219,0.831325
p f en98,0.716083,0.883534
p f en6,0.666473,0.871486
np nf,0.662255,0.863454
p f en83,0.643695,0.855422
np f en57,0.793873,0.84739
p f en49,0.841761,0.84739
p nf,0.811713,0.859438
p f en57,0.729181,0.843373
np f en83,0.782948,0.831325


In [6]:
process_to_present(roberta_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}}
 & loss & accuracy \\
np f en6 & 0.82 & 0.83 \\
p f en98 & 0.72 & 0.88 \\
p f en6 & 0.67 & 0.87 \\
np nf & 0.66 & 0.86 \\
p f en83 & 0.64 & 0.86 \\
np f en57 & 0.79 & 0.85 \\
p f en49 & 0.84 & 0.85 \\
p nf & 0.81 & 0.86 \\
p f en57 & 0.73 & 0.84 \\
np f en83 & 0.78 & 0.83 \\
np f en49 & 0.70 & 0.84 \\
np f en98 & 1.08 & 0.83 \\
avg en np f & 0.83 & 0.84 \\
avg en p f & 0.72 & 0.86 \\
\end{tabular}



## CAMEM

In [7]:
_, avg = average_over_users(dict_res_camem)
camem_results = {**dict_res_camem, ** avg}

In [8]:
pd.DataFrame(camem_results).T

Unnamed: 0,loss,accuracy
np f it1,1.051054,0.791304
p nf,0.790968,0.852174
np f it26,1.026793,0.773913
np f it44,1.166016,0.756522
p f it38,0.565015,0.86087
np nf,1.06734,0.795652
p f it43,0.699386,0.873913
p f it44,0.663063,0.86087
p f it26,0.653166,0.856522
np f it38,1.107211,0.786957


In [12]:
process_to_present(camem_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}}
 & loss & accuracy \\
np f it1 & 1.05 & 0.79 \\
p nf & 0.79 & 0.85 \\
np f it26 & 1.03 & 0.77 \\
np f it44 & 1.17 & 0.76 \\
p f it38 & 0.57 & 0.86 \\
np nf & 1.07 & 0.80 \\
p f it43 & 0.70 & 0.87 \\
p f it44 & 0.66 & 0.86 \\
p f it26 & 0.65 & 0.86 \\
np f it38 & 1.11 & 0.79 \\
p f it1 & 0.54 & 0.89 \\
avg it np f & 1.09 & 0.78 \\
avg it p f & 0.62 & 0.87 \\
\end{tabular}



## XLM

In [9]:
avg_en, avg_it = average_over_users(dict_res_xlm)
xlm_results = {**dict_res_xlm, ** avg_en, **avg_it}

In [10]:
pd.DataFrame(xlm_results).T

Unnamed: 0,loss,accuracy
np f en49,0.782614,0.863454
np f it1,0.978134,0.826087
p f en83,0.491552,0.763052
p f en49,0.520308,0.843373
np f en83,0.696667,0.855422
en p nf,0.423588,0.86747
p f en98,0.749637,0.835341
it p nf,0.552221,0.826087
np f en98,0.792539,0.855422
np f en6,0.808436,0.859438


In [11]:
process_to_present(xlm_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}}
 & loss & accuracy \\
np f en49 & 0.78 & 0.86 \\
np f it1 & 0.98 & 0.83 \\
p f en83 & 0.49 & 0.76 \\
p f en49 & 0.52 & 0.84 \\
np f en83 & 0.70 & 0.86 \\
en p nf & 0.42 & 0.87 \\
p f en98 & 0.75 & 0.84 \\
it p nf & 0.55 & 0.83 \\
np f en98 & 0.79 & 0.86 \\
np f en6 & 0.81 & 0.86 \\
p f it38 & 0.58 & 0.84 \\
np f it38 & 0.93 & 0.81 \\
np f it26 & 1.04 & 0.80 \\
np f it44 & 0.92 & 0.80 \\
np f it43 & 0.82 & 0.82 \\
p f it1 & 0.49 & 0.88 \\
p f it26 & 0.67 & 0.86 \\
p f it44 & 0.64 & 0.83 \\
p f it43 & 0.58 & 0.85 \\
en np nf & 0.68 & 0.86 \\
p f en57 & 0.68 & 0.85 \\
p f en6 & 0.42 & 0.87 \\
np f en57 & 0.72 & 0.85 \\
it np nf & 1.04 & 0.80 \\
avg en np f & 0.76 & 0.86 \\
avg en p f & 0.57 & 0.83 \\
avg it np f & 0.94 & 0.81 \\
avg it p f & 0.59 & 0.85 \\
\end{tabular}

