In [35]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections
from collections import Counter


In [57]:
def collect_metrics(root_dir):

    depth = 2

    dict_res_roberta = dict()
    dict_res_camem = dict()
    dict_res_xlm = dict()
    
    for subdir, dirs, files in os.walk(root_dir):
        if subdir[len(root_dir):].count(os.sep) < depth:
            for file_ in files:
                if file_ == "xai_metrics.json":
                    with open(subdir + "/" + file_) as f:
                        d = json.load(f)
                        reg_log_odd = d["reg_log_odd"]
                        reg_anti_log_odd = d["reg_anti_log_odd"]
                        reg_comp = d["reg_comp"]
                        reg_suff = d["reg_suff"]
                        
                    model_type = subdir.split(os.sep)[-1].split("_")[0]
                    if model_type == "roberta":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_roberta[dataset_name] = {"reg log odd": reg_log_odd, "reg anti log odd": reg_anti_log_odd, "reg comp": reg_comp, "reg suff": reg_suff}
                    elif model_type == "camem":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_camem[dataset_name] = {"reg log odd": reg_log_odd, "reg anti log odd": reg_anti_log_odd, "reg comp": reg_comp, "reg suff": reg_suff}
                    elif model_type == "xlm":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_xlm[dataset_name] = {"reg log odd": reg_log_odd, "reg anti log odd": reg_anti_log_odd, "reg comp": reg_comp, "reg suff": reg_suff}

    return dict_res_roberta, dict_res_camem, dict_res_xlm

def _average_dicts(dicts):
    avg_dict = collections.defaultdict(lambda: 0)
    
    for i, d in enumerate(dicts):
        for k, v in d.items():
            avg_dict[k] += v
        
    for k, v in avg_dict.items():
        avg_dict[k] /= (i+1)
    
    return avg_dict

def average_over_users(res):
    averaged_en = collections.defaultdict(list)
    averaged_it = collections.defaultdict(list)

    for k, v in res.items():
        if "en" in k and not ("nf" in k):
            averaged_en[" ".join(k.split(" ")[0:2])].append(v)
        elif "it" in k and not ("nf" in k):
            averaged_it[" ".join(k.split(" ")[0:2])].append(v)
    
    result_en = dict()
    result_it = dict()
            
    for k, v in averaged_en.items():
        result_en["avg en " + k] = _average_dicts(v)
        
    for k, v in averaged_it.items():
        result_it["avg it " + k] = _average_dicts(v)
                        
    return result_en, result_it

def process_to_present(res, column_width="1.1cm", plot_transpose=True):
    df_res = pd.DataFrame.from_dict(res).T
    # df_res = df_res.rename(columns=mapping_columns_names)
    if not plot_transpose:
        df_res = df_res.T
    s = df_res.style
    s.format(na_rep='MISS', precision=2)
    print(s.to_latex(column_format='l'+('p{'+f'{column_width}'+'}')*len(df_res.columns)))

In [55]:
dict_res_roberta, dict_res_camem, dict_res_xlm = collect_metrics("../output/complexity")

## ROBERTA

In [58]:
avg, _ = average_over_users(dict_res_roberta)
roberta_results = {**dict_res_roberta, ** avg}

In [59]:
pd.DataFrame(roberta_results).T

Unnamed: 0,reg log odd,reg anti log odd,reg comp,reg suff
np f en6,0.281655,0.459576,0.53387,1.205314
p f en98,0.197513,0.50781,0.266848,1.476693
p f en6,0.187661,0.555138,0.287304,0.680507
p f en83,0.181412,0.571277,0.228058,1.260887
np f en57,0.071694,1.314689,0.541307,1.379453
p f en49,0.129023,0.524129,0.265924,0.946761
p f en57,0.166801,0.418728,0.211223,0.790394
np f en83,0.239904,0.644688,0.950595,2.032111
np f en49,0.067476,0.913189,0.576723,0.969609
np f en98,0.086991,0.929699,0.545342,1.393393


In [60]:
process_to_present(roberta_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & reg log odd & reg anti log odd & reg comp & reg suff \\
np f en6 & 0.28 & 0.46 & 0.53 & 1.21 \\
p f en98 & 0.20 & 0.51 & 0.27 & 1.48 \\
p f en6 & 0.19 & 0.56 & 0.29 & 0.68 \\
p f en83 & 0.18 & 0.57 & 0.23 & 1.26 \\
np f en57 & 0.07 & 1.31 & 0.54 & 1.38 \\
p f en49 & 0.13 & 0.52 & 0.27 & 0.95 \\
p f en57 & 0.17 & 0.42 & 0.21 & 0.79 \\
np f en83 & 0.24 & 0.64 & 0.95 & 2.03 \\
np f en49 & 0.07 & 0.91 & 0.58 & 0.97 \\
np f en98 & 0.09 & 0.93 & 0.55 & 1.39 \\
np nf en & 0.10 & 1.33 & 0.69 & 1.34 \\
p nf en & 0.12 & 0.46 & 0.20 & 0.97 \\
avg en np f & 0.15 & 0.85 & 0.63 & 1.40 \\
avg en p f & 0.17 & 0.52 & 0.25 & 1.03 \\
\end{tabular}



## CAMEM

In [66]:
_, avg = average_over_users(dict_res_camem)
camem_results = {**dict_res_camem, ** avg}

In [67]:
pd.DataFrame(camem_results).T

Unnamed: 0,reg log odd,reg anti log odd,reg comp,reg suff
np nf it,0.396254,0.437657,0.807162,0.92877
np f it1,0.495323,1.177663,0.737699,2.347744
np f it26,0.555848,1.330912,0.873797,2.16077
np f it44,0.72208,0.44736,0.574201,1.043763
np f it43,0.322706,1.034166,0.762247,1.343881
p f it38,0.133412,0.632523,0.356879,1.045997
p f it43,0.219497,0.584296,0.363282,1.014981
p f it44,0.301492,0.746183,0.425991,0.890883
p f it26,0.266799,0.527151,0.477854,1.415736
np f it38,0.249208,1.105554,0.820003,0.726716


In [69]:
process_to_present(camem_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & reg log odd & reg anti log odd & reg comp & reg suff \\
np nf it & 0.40 & 0.44 & 0.81 & 0.93 \\
np f it1 & 0.50 & 1.18 & 0.74 & 2.35 \\
np f it26 & 0.56 & 1.33 & 0.87 & 2.16 \\
np f it44 & 0.72 & 0.45 & 0.57 & 1.04 \\
np f it43 & 0.32 & 1.03 & 0.76 & 1.34 \\
p f it38 & 0.13 & 0.63 & 0.36 & 1.05 \\
p f it43 & 0.22 & 0.58 & 0.36 & 1.01 \\
p f it44 & 0.30 & 0.75 & 0.43 & 0.89 \\
p f it26 & 0.27 & 0.53 & 0.48 & 1.42 \\
np f it38 & 0.25 & 1.11 & 0.82 & 0.73 \\
p f it1 & 0.29 & 0.74 & 0.43 & 0.88 \\
p nf it & 0.17 & 0.56 & 0.32 & 0.97 \\
avg it np f & 0.47 & 1.02 & 0.75 & 1.52 \\
avg it p f & 0.24 & 0.65 & 0.41 & 1.05 \\
\end{tabular}



## XLM

In [72]:
avg_en, avg_it = average_over_users(dict_res_xlm)
xlm_results = {**dict_res_xlm, **avg_en, **avg_it}

In [73]:
pd.DataFrame(xlm_results).T

Unnamed: 0,reg log odd,reg anti log odd,reg comp,reg suff
p nf en,0.245842,0.533315,0.29889,1.269129
np f en49,0.156246,0.554095,0.612012,1.042012
np f it1,0.251412,0.92997,0.697696,1.39103
p f en83,0.220898,0.449277,0.307622,0.953547
p f en49,0.255282,1.067526,0.26271,2.103609
np f en83,0.161423,0.959112,0.553996,1.045644
p f en98,0.192654,0.334844,0.299893,0.90425
np f en98,0.274397,0.384863,0.690804,1.597231
np f en6,0.201601,0.812181,0.741617,1.671646
p f it38,0.249547,0.413623,0.256269,1.624376


In [20]:
process_to_present(dict_res_xlm)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & reg log odd & reg anti log odd & reg comp & reg suff \\
p nf en & 0.25 & 0.53 & 0.30 & 1.27 \\
np f en49 & 0.16 & 0.55 & 0.61 & 1.04 \\
np f it1 & 0.25 & 0.93 & 0.70 & 1.39 \\
p f en83 & 0.22 & 0.45 & 0.31 & 0.95 \\
p f en49 & 0.26 & 1.07 & 0.26 & 2.10 \\
np f en83 & 0.16 & 0.96 & 0.55 & 1.05 \\
p f en98 & 0.19 & 0.33 & 0.30 & 0.90 \\
np f en98 & 0.27 & 0.38 & 0.69 & 1.60 \\
np f en6 & 0.20 & 0.81 & 0.74 & 1.67 \\
p f it38 & 0.25 & 0.41 & 0.26 & 1.62 \\
np f it38 & 0.32 & 0.42 & 0.64 & 1.19 \\
np f it26 & 0.33 & 0.82 & 0.58 & 2.24 \\
np f it44 & 0.27 & 1.24 & 0.65 & 2.12 \\
np f it43 & 0.21 & 2.67 & 0.73 & 2.73 \\
p f it1 & 0.36 & 0.46 & 0.44 & 2.50 \\
p f it26 & 0.38 & 0.39 & 0.32 & 1.22 \\
p f it44 & 0.20 & 0.31 & 0.27 & 1.24 \\
p f it43 & 0.16 & 0.20 & 0.19 & 0.98 \\
p nf it & 0.18 & 0.28 & 0.22 & 1.18 \\
np nf en & 0.07 & 0.96 & 0.75 & 0.82 \\
p f en57 & 0.22 & 0.48 & 0.24 & 1.10 \\
p f en6 & 0.26 & 0.35 & 0.27 & 0.95 \\
np f en