In [1]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections

In [2]:
def collect_losses(root_dir):

    depth = 2

    dict_res_roberta = dict()
    dict_res_camem = dict()
    dict_res_xlm = dict()
    
    for subdir, dirs, files in os.walk(root_dir):
        if subdir[len(root_dir):].count(os.sep) < depth:
            for file_ in files:
                if file_ == "finetuning_results.json":
                    with open(subdir + "/" + file_) as f:
                        d = json.load(f)
                        losses_ts = d["losses_ts"]
                    model_type = subdir.split(os.sep)[-1].split("_")[-1]
                    if model_type == "roberta":
                        dataset_name = "_".join(subdir.split(os.sep)[-1].split("_")[2:-1])
                        dict_res_roberta[dataset_name] = losses_ts
                    elif model_type == "xlm":
                        dataset_name = "_".join(subdir.split(os.sep)[-1].split("_")[2:-1])
                        dict_res_xlm[dataset_name] = losses_ts
                    elif model_type == "camem":
                        dataset_name = "_".join(subdir.split(os.sep)[-1].split("_")[2:-1])
                        dict_res_camem[dataset_name] = losses_ts

    return dict_res_roberta, dict_res_camem, dict_res_xlm


def process_to_present(res, column_width="1.1cm", plot_transpose=True):
    df_res = pd.DataFrame.from_dict(res).T
    df_res.iloc[:, 1:10]  = 100 - df_res.iloc[:, 1:10]
    # df_res = df_res.rename(columns=mapping_columns_names)
    if not plot_transpose:
        df_res = df_res.T
    s = df_res.style
    s.format(na_rep='MISS', precision=1)
    print(s.to_latex(column_format='l'+('p{'+f'{column_width}'+'}')*len(df_res.columns)))
    
    
def divide_mse_mae(res):
    res_mae = {}
    res_mse = {}
    
    for dataset, results in res.items():
        res_mae[dataset] = {}
        res_mse[dataset] = {}
        for k, v in res[dataset].items():
            if "mae" in k:
                res_mae[dataset][k] = 100 - v
            else:
                res_mse[dataset][k] = v
    
    return res_mse, res_mae

In [3]:
dict_res_roberta, dict_res_camem, dict_res_xlm = collect_losses("../output")

# divide datasets in mse and mae results
res_mse_roberta, res_mae_roberta = divide_mse_mae(dict_res_roberta)
res_mse_camem, res_mae_camem = divide_mse_mae(dict_res_camem)
res_mse_xlm, res_mae_xlm = divide_mse_mae(dict_res_xlm)
res_mse_roberta, res_mae_roberta = divide_mse_mae(dict_res_roberta)
res_mse_camem, res_mae_camem = divide_mse_mae(dict_res_camem)
res_mse_xlm, res_mae_xlm = divide_mse_mae(dict_res_xlm)

## ROBERTA Results

In [4]:
pd.DataFrame(res_mse_roberta).T

Unnamed: 0,mse_skip,mse_firstfix_dur,mse_firstrun_dur,mse_dur,mse_firstrun_nfix,mse_nfix,mse_refix,mse_reread
en_57_p,1634.24801,190.328665,56.741089,42.822611,53.009018,35.124177,799.516632,1211.283344
en_98_p,681.125654,79.933669,64.387167,78.433762,182.754953,81.822781,671.604527,1120.801659
en_49_p,1641.783783,230.240421,153.144654,77.712659,167.494364,157.115408,661.051993,803.368231
en_6_p,929.996343,56.646202,35.533469,79.285073,147.722311,112.070343,1023.547217,1392.260237
en_98_np,752.298468,99.906328,80.489239,103.608047,215.310588,111.283185,772.76748,1276.766205
en_49_np,1551.128967,238.496854,158.618608,80.3925,175.372326,160.801877,652.709409,785.995842
en_83_p,1237.130617,107.014177,33.310039,56.731773,53.22836,86.831578,298.311242,440.043237
en_6_np,1016.409552,74.978531,49.176037,97.580716,178.130702,137.261862,1154.596466,1480.479456
en_57_np,1528.210168,198.535983,60.85631,46.366292,57.768185,38.84697,832.256726,1225.563739
en_83_np,1369.336523,130.61959,43.456786,69.730474,69.03563,109.379874,309.437395,482.862338


In [5]:
pd.DataFrame(res_mae_roberta).T

Unnamed: 0,mae_skip,mae_firstfix_dur,mae_firstrun_dur,mae_dur,mae_firstrun_nfix,mae_nfix,mae_refix,mae_reread
en_57_p,80.936015,91.600892,96.278488,97.051629,96.37781,97.366644,89.873296,85.505472
en_98_p,91.535548,95.222071,96.248376,96.041697,92.544908,95.915789,90.409641,85.3563
en_49_p,80.434763,90.985132,93.054857,95.769197,92.065542,92.929263,91.572475,90.191367
en_6_p,88.803383,96.24219,97.334923,96.032963,93.638166,95.185924,86.66373,82.745772
en_98_np,89.758473,94.262731,95.413756,95.187826,91.347372,94.812212,89.681785,83.625769
en_49_np,79.856446,90.787055,92.904513,95.653962,91.874859,92.842367,91.393085,89.56624
en_83_p,84.59199,94.465822,97.297675,96.626028,96.201922,95.538469,95.393009,94.022581
en_6_np,86.108719,95.348361,96.709341,95.390422,92.422509,94.417915,85.217033,80.871325
en_57_np,80.152442,91.520455,96.074147,96.881589,96.163839,97.159642,89.655005,85.196898
en_83_np,82.042829,93.428023,96.553565,95.811393,95.216019,94.424506,95.656746,93.785998


In [6]:
process_to_present(res_mse_roberta)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mse_skip & mse_firstfix_dur & mse_firstrun_dur & mse_dur & mse_firstrun_nfix & mse_nfix & mse_refix & mse_reread \\
en_57_p & 1634.2 & -90.3 & 43.3 & 57.2 & 47.0 & 64.9 & -699.5 & -1111.3 \\
en_98_p & 681.1 & 20.1 & 35.6 & 21.6 & -82.8 & 18.2 & -571.6 & -1020.8 \\
en_49_p & 1641.8 & -130.2 & -53.1 & 22.3 & -67.5 & -57.1 & -561.1 & -703.4 \\
en_6_p & 930.0 & 43.4 & 64.5 & 20.7 & -47.7 & -12.1 & -923.5 & -1292.3 \\
en_98_np & 752.3 & 0.1 & 19.5 & -3.6 & -115.3 & -11.3 & -672.8 & -1176.8 \\
en_49_np & 1551.1 & -138.5 & -58.6 & 19.6 & -75.4 & -60.8 & -552.7 & -686.0 \\
en_83_p & 1237.1 & -7.0 & 66.7 & 43.3 & 46.8 & 13.2 & -198.3 & -340.0 \\
en_6_np & 1016.4 & 25.0 & 50.8 & 2.4 & -78.1 & -37.3 & -1054.6 & -1380.5 \\
en_57_np & 1528.2 & -98.5 & 39.1 & 53.6 & 42.2 & 61.2 & -732.3 & -1125.6 \\
en_83_np & 1369.3 & -30.6 & 56.5 & 30.3 & 31.0 & -9.4 & -209.4 & -382.9 \\
\end{tabular}



In [7]:
process_to_present(res_mae_roberta)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mae_skip & mae_firstfix_dur & mae_firstrun_dur & mae_dur & mae_firstrun_nfix & mae_nfix & mae_refix & mae_reread \\
en_57_p & 80.9 & 8.4 & 3.7 & 2.9 & 3.6 & 2.6 & 10.1 & 14.5 \\
en_98_p & 91.5 & 4.8 & 3.8 & 4.0 & 7.5 & 4.1 & 9.6 & 14.6 \\
en_49_p & 80.4 & 9.0 & 6.9 & 4.2 & 7.9 & 7.1 & 8.4 & 9.8 \\
en_6_p & 88.8 & 3.8 & 2.7 & 4.0 & 6.4 & 4.8 & 13.3 & 17.3 \\
en_98_np & 89.8 & 5.7 & 4.6 & 4.8 & 8.7 & 5.2 & 10.3 & 16.4 \\
en_49_np & 79.9 & 9.2 & 7.1 & 4.3 & 8.1 & 7.2 & 8.6 & 10.4 \\
en_83_p & 84.6 & 5.5 & 2.7 & 3.4 & 3.8 & 4.5 & 4.6 & 6.0 \\
en_6_np & 86.1 & 4.7 & 3.3 & 4.6 & 7.6 & 5.6 & 14.8 & 19.1 \\
en_57_np & 80.2 & 8.5 & 3.9 & 3.1 & 3.8 & 2.8 & 10.3 & 14.8 \\
en_83_np & 82.0 & 6.6 & 3.4 & 4.2 & 4.8 & 5.6 & 4.3 & 6.2 \\
\end{tabular}



## CAMEM Results

In [8]:
pd.DataFrame(res_mse_camem).T

Unnamed: 0,mse_skip,mse_firstfix_dur,mse_firstrun_dur,mse_dur,mse_firstrun_nfix,mse_nfix,mse_refix,mse_reread
it_43_np,1206.30036,173.004082,78.098402,125.115767,172.095854,123.651561,458.565247,704.113882
it_44_np,1018.162354,200.414616,119.260728,87.617688,136.780164,97.163,803.331787,1316.00351
it_43_p,1330.664691,205.895341,86.881858,136.759622,199.259041,134.072346,454.895932,710.045641
it_38_p,817.610117,202.879382,122.070423,139.977772,126.30136,196.531737,892.887756,2016.68385
it_38_np,728.090054,183.034164,114.021289,132.696457,119.029887,184.730254,873.258008,1829.686084
it_1_p,872.407385,80.217889,43.266185,24.359126,34.548071,30.602466,1055.370343,1172.471674
it_26_p,1040.795255,84.046247,10.801943,16.457597,9.04986,15.785651,736.008937,806.800584
it_44_p,1120.577582,219.616629,125.633448,89.315589,143.462997,100.022496,817.348456,1413.829694
it_26_np,847.579816,80.379245,12.460171,18.572121,11.45368,17.01199,763.118585,769.429575
it_1_np,804.015637,74.763204,44.976555,25.108034,36.397618,31.467024,1021.171082,1131.100989


In [9]:
pd.DataFrame(res_mae_camem).T

Unnamed: 0,mae_skip,mae_firstfix_dur,mae_firstrun_dur,mae_dur,mae_firstrun_nfix,mae_nfix,mae_refix,mae_reread
it_43_np,84.482537,92.927086,95.771733,94.733109,92.879915,94.579235,94.081189,91.349829
it_44_np,86.660609,92.268757,94.419554,96.250387,93.682702,95.835658,89.771833,83.482413
it_43_p,84.640594,92.313666,95.512973,94.467232,92.403717,94.346042,94.142672,91.641531
it_38_p,90.362701,92.147791,94.287865,94.58603,93.795489,93.357287,89.321315,76.889207
it_38_np,89.716982,92.59961,94.538732,94.779149,94.019513,93.697343,88.815717,77.041099
it_1_p,89.908264,95.445111,97.200837,98.348715,97.417191,98.31812,87.505358,85.758344
it_26_p,88.288848,95.277229,98.673864,98.352032,98.91822,98.358112,91.027534,90.241812
it_44_p,87.166425,91.935749,94.280787,96.208126,93.531048,95.741839,89.9281,83.722901
it_26_np,88.854681,95.547444,98.63147,98.351057,98.862819,98.367582,90.477017,90.164736
it_1_np,89.395407,95.670433,97.205954,98.25528,97.422914,98.237387,87.439514,85.555516


In [10]:
process_to_present(res_mse_roberta)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mse_skip & mse_firstfix_dur & mse_firstrun_dur & mse_dur & mse_firstrun_nfix & mse_nfix & mse_refix & mse_reread \\
en_57_p & 1634.2 & -90.3 & 43.3 & 57.2 & 47.0 & 64.9 & -699.5 & -1111.3 \\
en_98_p & 681.1 & 20.1 & 35.6 & 21.6 & -82.8 & 18.2 & -571.6 & -1020.8 \\
en_49_p & 1641.8 & -130.2 & -53.1 & 22.3 & -67.5 & -57.1 & -561.1 & -703.4 \\
en_6_p & 930.0 & 43.4 & 64.5 & 20.7 & -47.7 & -12.1 & -923.5 & -1292.3 \\
en_98_np & 752.3 & 0.1 & 19.5 & -3.6 & -115.3 & -11.3 & -672.8 & -1176.8 \\
en_49_np & 1551.1 & -138.5 & -58.6 & 19.6 & -75.4 & -60.8 & -552.7 & -686.0 \\
en_83_p & 1237.1 & -7.0 & 66.7 & 43.3 & 46.8 & 13.2 & -198.3 & -340.0 \\
en_6_np & 1016.4 & 25.0 & 50.8 & 2.4 & -78.1 & -37.3 & -1054.6 & -1380.5 \\
en_57_np & 1528.2 & -98.5 & 39.1 & 53.6 & 42.2 & 61.2 & -732.3 & -1125.6 \\
en_83_np & 1369.3 & -30.6 & 56.5 & 30.3 & 31.0 & -9.4 & -209.4 & -382.9 \\
\end{tabular}



In [11]:
process_to_present(res_mae_roberta)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mae_skip & mae_firstfix_dur & mae_firstrun_dur & mae_dur & mae_firstrun_nfix & mae_nfix & mae_refix & mae_reread \\
en_57_p & 80.9 & 8.4 & 3.7 & 2.9 & 3.6 & 2.6 & 10.1 & 14.5 \\
en_98_p & 91.5 & 4.8 & 3.8 & 4.0 & 7.5 & 4.1 & 9.6 & 14.6 \\
en_49_p & 80.4 & 9.0 & 6.9 & 4.2 & 7.9 & 7.1 & 8.4 & 9.8 \\
en_6_p & 88.8 & 3.8 & 2.7 & 4.0 & 6.4 & 4.8 & 13.3 & 17.3 \\
en_98_np & 89.8 & 5.7 & 4.6 & 4.8 & 8.7 & 5.2 & 10.3 & 16.4 \\
en_49_np & 79.9 & 9.2 & 7.1 & 4.3 & 8.1 & 7.2 & 8.6 & 10.4 \\
en_83_p & 84.6 & 5.5 & 2.7 & 3.4 & 3.8 & 4.5 & 4.6 & 6.0 \\
en_6_np & 86.1 & 4.7 & 3.3 & 4.6 & 7.6 & 5.6 & 14.8 & 19.1 \\
en_57_np & 80.2 & 8.5 & 3.9 & 3.1 & 3.8 & 2.8 & 10.3 & 14.8 \\
en_83_np & 82.0 & 6.6 & 3.4 & 4.2 & 4.8 & 5.6 & 4.3 & 6.2 \\
\end{tabular}



## XLM Results

In [12]:
pd.DataFrame(res_mse_xlm).T

Unnamed: 0,mse_skip,mse_firstfix_dur,mse_firstrun_dur,mse_dur,mse_firstrun_nfix,mse_nfix,mse_refix,mse_reread
en_57_np,1270.764838,167.737971,50.778973,38.109819,48.09559,31.998088,703.770883,1025.670178
en_49_np,1309.767798,205.65639,133.99713,68.094334,148.003922,136.567917,555.26832,663.641434
it_26_p,767.759458,61.206044,7.577573,11.213895,7.152116,10.180172,608.796318,555.939288
en_6_np,862.288953,63.599807,40.213116,80.702182,150.806509,113.195927,956.231122,1229.431873
en_57_p,1337.511035,158.279008,47.147339,35.792365,43.867191,29.636941,683.472897,1016.57442
it_38_np,677.688998,171.844495,103.448839,119.294678,111.591663,166.879243,799.21922,1695.120044
it_26_np,786.655841,75.333921,12.44749,17.735952,12.036681,16.576627,714.494339,722.975598
it_43_p,1097.647626,146.86917,60.97436,98.131592,141.42112,95.740025,385.269154,590.5429
en_98_np,625.520605,85.01949,66.749213,85.90805,183.159279,92.367079,646.508899,1061.566541
it_44_p,997.764673,178.749805,99.863415,76.85969,115.175173,84.539333,750.892014,1284.001126


In [13]:
pd.DataFrame(res_mae_xlm).T

Unnamed: 0,mae_skip,mae_firstfix_dur,mae_firstrun_dur,mae_dur,mae_firstrun_nfix,mae_nfix,mae_refix,mae_reread
en_57_np,83.399261,92.837086,96.714246,97.402672,96.772402,97.636231,91.252589,87.567564
en_49_np,83.173832,92.235203,94.058708,96.355363,93.207483,93.958837,92.66467,91.227933
it_26_p,90.711237,96.345626,99.007848,98.808439,99.141455,98.806734,92.151036,92.426487
en_6_np,88.543914,96.096418,97.273891,96.21085,93.63892,95.402297,87.73969,84.241499
en_57_p,84.068332,93.05534,96.909855,97.542775,97.0092,97.791785,91.408952,87.969998
it_38_np,90.338556,93.053664,94.981744,95.228799,94.427576,94.208967,89.724246,78.681989
it_26_np,89.618568,95.868273,98.71704,98.450852,98.898129,98.453988,91.109473,90.859968
it_43_p,86.652954,93.905961,96.562402,95.684322,93.986192,95.604116,94.234101,92.182936
en_98_np,91.568076,95.171344,96.190196,96.00109,92.711591,95.697368,91.481723,86.431977
it_44_p,87.762856,92.920328,95.071892,96.633848,94.394463,96.291843,89.881325,84.358306


In [14]:
process_to_present(res_mse_xlm)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mse_skip & mse_firstfix_dur & mse_firstrun_dur & mse_dur & mse_firstrun_nfix & mse_nfix & mse_refix & mse_reread \\
en_57_np & 1270.8 & -67.7 & 49.2 & 61.9 & 51.9 & 68.0 & -603.8 & -925.7 \\
en_49_np & 1309.8 & -105.7 & -34.0 & 31.9 & -48.0 & -36.6 & -455.3 & -563.6 \\
it_26_p & 767.8 & 38.8 & 92.4 & 88.8 & 92.8 & 89.8 & -508.8 & -455.9 \\
en_6_np & 862.3 & 36.4 & 59.8 & 19.3 & -50.8 & -13.2 & -856.2 & -1129.4 \\
en_57_p & 1337.5 & -58.3 & 52.9 & 64.2 & 56.1 & 70.4 & -583.5 & -916.6 \\
it_38_np & 677.7 & -71.8 & -3.4 & -19.3 & -11.6 & -66.9 & -699.2 & -1595.1 \\
it_26_np & 786.7 & 24.7 & 87.6 & 82.3 & 88.0 & 83.4 & -614.5 & -623.0 \\
it_43_p & 1097.6 & -46.9 & 39.0 & 1.9 & -41.4 & 4.3 & -285.3 & -490.5 \\
en_98_np & 625.5 & 15.0 & 33.3 & 14.1 & -83.2 & 7.6 & -546.5 & -961.6 \\
it_44_p & 997.8 & -78.7 & 0.1 & 23.1 & -15.2 & 15.5 & -650.9 & -1184.0 \\
en_49_p & 1354.3 & -102.0 & -30.5 & 36.5 & -44.9 & -

In [15]:
process_to_present(res_mae_xlm)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mae_skip & mae_firstfix_dur & mae_firstrun_dur & mae_dur & mae_firstrun_nfix & mae_nfix & mae_refix & mae_reread \\
en_57_np & 83.4 & 7.2 & 3.3 & 2.6 & 3.2 & 2.4 & 8.7 & 12.4 \\
en_49_np & 83.2 & 7.8 & 5.9 & 3.6 & 6.8 & 6.0 & 7.3 & 8.8 \\
it_26_p & 90.7 & 3.7 & 1.0 & 1.2 & 0.9 & 1.2 & 7.8 & 7.6 \\
en_6_np & 88.5 & 3.9 & 2.7 & 3.8 & 6.4 & 4.6 & 12.3 & 15.8 \\
en_57_p & 84.1 & 6.9 & 3.1 & 2.5 & 3.0 & 2.2 & 8.6 & 12.0 \\
it_38_np & 90.3 & 6.9 & 5.0 & 4.8 & 5.6 & 5.8 & 10.3 & 21.3 \\
it_26_np & 89.6 & 4.1 & 1.3 & 1.5 & 1.1 & 1.5 & 8.9 & 9.1 \\
it_43_p & 86.7 & 6.1 & 3.4 & 4.3 & 6.0 & 4.4 & 5.8 & 7.8 \\
en_98_np & 91.6 & 4.8 & 3.8 & 4.0 & 7.3 & 4.3 & 8.5 & 13.6 \\
it_44_p & 87.8 & 7.1 & 4.9 & 3.4 & 5.6 & 3.7 & 10.1 & 15.6 \\
en_49_p & 83.5 & 7.7 & 5.9 & 3.5 & 6.7 & 5.9 & 7.4 & 8.8 \\
it_1_p & 90.6 & 3.6 & 2.2 & 1.3 & 1.9 & 1.3 & 11.1 & 12.6 \\
it_1_np & 90.1 & 4.1 & 2.6 & 1.6 & 2.4 & 1.6 & 11.7 & 13.4 \\
i