In [1]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections
from collections import Counter


In [2]:
def collect_metrics(root_dir):

    depth = 2

    dict_res_roberta = dict()
    dict_res_camem = dict()
    dict_res_xlm = dict()
    
    for subdir, dirs, files in os.walk(root_dir):
        if subdir[len(root_dir):].count(os.sep) < depth:
            for file_ in files:
                if file_ == "xai_metrics.json":
                    with open(subdir + "/" + file_) as f:
                        d = json.load(f)
                        log_odd = d["log_odd"]
                        anti_log_odd = d["anti_log_odd"]
                        comp = d["comp"]
                        suff = d["suff"]
                        
                    model_type = subdir.split(os.sep)[-1].split("_")[0]
                    if model_type == "roberta":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_roberta[dataset_name] = {"log odd": log_odd, "anti log odd": anti_log_odd, "comp": comp, "suff": suff}
                    elif model_type == "camem":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_camem[dataset_name] = {"log odd": log_odd, "anti log odd": anti_log_odd, "comp": comp, "suff": suff}
                    elif model_type == "xlm":
                        dataset_name = " ".join(subdir.split(os.sep)[-1].split("_")[1:])
                        dict_res_xlm[dataset_name] = {"log odd": log_odd, "anti log odd": anti_log_odd, "comp": comp, "suff": suff}

    return dict_res_roberta, dict_res_camem, dict_res_xlm

def _average_dicts(dicts):
    avg_dict = collections.defaultdict(lambda: 0)
    
    for i, d in enumerate(dicts):
        for k, v in d.items():
            avg_dict[k] += v
        
    for k, v in avg_dict.items():
        avg_dict[k] /= (i+1)
    
    return avg_dict

def average_over_users(res):
    averaged_en = collections.defaultdict(list)
    averaged_it = collections.defaultdict(list)

    for k, v in res.items():
        if "en" in k and not ("nf" in k):
            averaged_en[" ".join(k.split(" ")[0:2])].append(v)
        elif "it" in k and not ("nf" in k):
            averaged_it[" ".join(k.split(" ")[0:2])].append(v)
    
    result_en = dict()
    result_it = dict()
            
    for k, v in averaged_en.items():
        result_en["avg en " + k] = _average_dicts(v)
        
    for k, v in averaged_it.items():
        result_it["avg it " + k] = _average_dicts(v)
                        
    return result_en, result_it

def process_to_present(res, column_width="1.1cm", plot_transpose=True):
    df_res = pd.DataFrame.from_dict(res).T
    # df_res = df_res.rename(columns=mapping_columns_names)
    if not plot_transpose:
        df_res = df_res.T
    s = df_res.style
    s.format(na_rep='MISS', precision=2)
    print(s.to_latex(column_format='l'+('p{'+f'{column_width}'+'}')*len(df_res.columns)))

In [3]:
dict_res_roberta, dict_res_camem, dict_res_xlm = collect_metrics("../output/complexity_binary")

## ROBERTA

In [4]:
avg, _ = average_over_users(dict_res_roberta)
roberta_results = {**dict_res_roberta, ** avg}

In [5]:
pd.DataFrame(roberta_results).T

Unnamed: 0,log odd,anti log odd,comp,suff
np f en6,-0.459284,-0.424233,0.080493,0.217943
p f en98,-0.244415,-0.419591,0.081334,0.116991
p f en6,-0.274485,-0.364303,0.0628,0.111566
p f en83,-0.160587,-0.579961,0.046547,0.133571
np f en57,-0.744432,-1.360248,0.1452,0.233538
p f en49,-0.222478,-0.62331,0.062881,0.115126
p f en57,-0.197198,-0.546337,0.052358,0.171366
np f en83,-0.74157,-1.274452,0.113591,0.12658
np f en49,-0.381276,-2.198366,0.093562,0.116776
np f en98,-0.839435,-0.670189,0.106823,0.1624


In [6]:
process_to_present(roberta_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & log odd & anti log odd & comp & suff \\
np f en6 & -0.46 & -0.42 & 0.08 & 0.22 \\
p f en98 & -0.24 & -0.42 & 0.08 & 0.12 \\
p f en6 & -0.27 & -0.36 & 0.06 & 0.11 \\
p f en83 & -0.16 & -0.58 & 0.05 & 0.13 \\
np f en57 & -0.74 & -1.36 & 0.15 & 0.23 \\
p f en49 & -0.22 & -0.62 & 0.06 & 0.12 \\
p f en57 & -0.20 & -0.55 & 0.05 & 0.17 \\
np f en83 & -0.74 & -1.27 & 0.11 & 0.13 \\
np f en49 & -0.38 & -2.20 & 0.09 & 0.12 \\
np f en98 & -0.84 & -0.67 & 0.11 & 0.16 \\
np nf en & -0.65 & -2.70 & 0.13 & 0.09 \\
p nf en & -0.33 & -0.38 & 0.09 & 0.14 \\
avg en np f & -0.63 & -1.19 & 0.11 & 0.17 \\
avg en p f & -0.22 & -0.51 & 0.06 & 0.13 \\
\end{tabular}



## CAMEM

In [7]:
_, avg = average_over_users(dict_res_camem)
camem_results = {**dict_res_camem, ** avg}

In [8]:
pd.DataFrame(camem_results).T

Unnamed: 0,log odd,anti log odd,comp,suff
np nf it,-1.483671,-0.515434,0.231554,0.172583
np f it1,-0.590192,-0.748598,0.168331,0.267598
np f it26,-0.851958,-0.86467,0.252818,0.144515
np f it44,-1.023154,-0.597771,0.235795,0.184699
np f it43,-0.750333,-0.563934,0.245073,0.085545
p f it38,-0.299521,-0.702669,0.098625,0.153825
p f it43,-0.28057,-1.538591,0.141701,0.257867
p f it44,-0.719143,-1.199128,0.15331,0.266002
p f it26,-0.536262,-1.038725,0.165318,0.25421
np f it38,-0.783741,-1.546216,0.092246,0.383972


In [9]:
process_to_present(camem_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & log odd & anti log odd & comp & suff \\
np nf it & -1.48 & -0.52 & 0.23 & 0.17 \\
np f it1 & -0.59 & -0.75 & 0.17 & 0.27 \\
np f it26 & -0.85 & -0.86 & 0.25 & 0.14 \\
np f it44 & -1.02 & -0.60 & 0.24 & 0.18 \\
np f it43 & -0.75 & -0.56 & 0.25 & 0.09 \\
p f it38 & -0.30 & -0.70 & 0.10 & 0.15 \\
p f it43 & -0.28 & -1.54 & 0.14 & 0.26 \\
p f it44 & -0.72 & -1.20 & 0.15 & 0.27 \\
p f it26 & -0.54 & -1.04 & 0.17 & 0.25 \\
np f it38 & -0.78 & -1.55 & 0.09 & 0.38 \\
p f it1 & -0.71 & -1.20 & 0.15 & 0.27 \\
p nf it & -0.30 & -1.16 & 0.12 & 0.23 \\
avg it np f & -0.80 & -0.86 & 0.20 & 0.21 \\
avg it p f & -0.51 & -1.14 & 0.14 & 0.24 \\
\end{tabular}



## XLM

In [10]:
avg_en, avg_it = average_over_users(dict_res_xlm)
xlm_results = {**dict_res_xlm, **avg_en, **avg_it}

In [11]:
pd.DataFrame(xlm_results).T

Unnamed: 0,log odd,anti log odd,comp,suff
p nf en,-0.048318,-0.003703,0.005469,0.142524
np f en49,-0.540323,-1.035784,0.107287,0.120163
np f it1,-0.461627,-0.474267,0.242873,0.414376
p f en83,-0.000595,-0.001608,0.000145,-0.011704
np nf it,-0.413272,-2.320906,0.296755,0.202384
p f en49,-0.354764,-0.538649,0.114954,0.247338
np f en83,-0.640894,-1.359719,0.127983,0.167032
p f en98,-0.271788,-0.04683,0.005794,0.169145
np f en98,-0.794521,-2.520903,0.137983,0.162096
np f en6,-0.635122,-0.425384,0.084604,0.202044


In [12]:
process_to_present(xlm_results)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & log odd & anti log odd & comp & suff \\
p nf en & -0.05 & -0.00 & 0.01 & 0.14 \\
np f en49 & -0.54 & -1.04 & 0.11 & 0.12 \\
np f it1 & -0.46 & -0.47 & 0.24 & 0.41 \\
p f en83 & -0.00 & -0.00 & 0.00 & -0.01 \\
np nf it & -0.41 & -2.32 & 0.30 & 0.20 \\
p f en49 & -0.35 & -0.54 & 0.11 & 0.25 \\
np f en83 & -0.64 & -1.36 & 0.13 & 0.17 \\
p f en98 & -0.27 & -0.05 & 0.01 & 0.17 \\
np f en98 & -0.79 & -2.52 & 0.14 & 0.16 \\
np f en6 & -0.64 & -0.43 & 0.08 & 0.20 \\
p f it38 & -0.04 & -0.03 & 0.01 & 0.00 \\
np f it38 & -0.99 & -0.97 & 0.33 & 0.08 \\
np f it26 & -1.04 & -0.56 & 0.25 & 0.07 \\
np f it44 & -0.68 & -0.80 & 0.18 & 0.16 \\
np f it43 & -0.67 & -0.36 & 0.26 & 0.21 \\
p f it1 & -0.40 & -0.57 & 0.15 & 0.36 \\
p f it26 & -0.18 & -0.08 & 0.01 & 0.12 \\
p f it44 & -0.05 & -0.05 & 0.01 & 0.09 \\
p f it43 & -0.04 & -0.02 & 0.00 & 0.05 \\
p nf it & -0.01 & -0.02 & -0.00 & 0.16 \\
np nf en & -0.19 & -3.19 & 0.14 & 0.24 \\
p f en57 & -0.06 &