In [1]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections

In [2]:
def collect_losses(root_dir):

    depth = 2

    dict_res_roberta = dict()
    dict_res_camem = dict()
    dict_res_xlm = dict()
    
    for subdir, dirs, files in os.walk(root_dir):
        if subdir[len(root_dir):].count(os.sep) < depth:
            for file_ in files:
                if file_ == "finetuning_results.json":
                    with open(subdir + "/" + file_) as f:
                        d = json.load(f)
                        losses_ts = d["losses_ts"]
                    model_type = subdir.split(os.sep)[-1].split("_")[-1]
                    if model_type == "roberta":
                        dataset_name = "_".join(subdir.split(os.sep)[-1].split("_")[2:-1])
                        dict_res_roberta[dataset_name] = losses_ts
                    elif model_type == "xlm":
                        dataset_name = "_".join(subdir.split(os.sep)[-1].split("_")[2:-1])
                        dict_res_xlm[dataset_name] = losses_ts
                    elif model_type == "camem":
                        dataset_name = "_".join(subdir.split(os.sep)[-1].split("_")[2:-1])
                        dict_res_camem[dataset_name] = losses_ts

    return dict_res_roberta, dict_res_camem, dict_res_xlm


def process_to_present(res, column_width="1.1cm", plot_transpose=True):
    df_res = pd.DataFrame.from_dict(res).T
    # df_res = df_res.rename(columns=mapping_columns_names)
    if not plot_transpose:
        df_res = df_res.T
    s = df_res.style
    s.format(na_rep='MISS', precision=1)
    print(s.to_latex(column_format='l'+('p{'+f'{column_width}'+'}')*len(df_res.columns)))
    
    
def divide_mse_mae(res):
    res_mae = {}
    res_mse = {}
    
    for dataset, results in res.items():
        res_mae[dataset.replace("_", " ")] = {}
        res_mse[dataset.replace("_", " ")] = {}
        for k, v in res[dataset].items():
            k = k.replace("_", " ")
            if "mae" in k:
                res_mae[dataset.replace("_", " ")][k.replace("mae", "acc")] = 100 - v
            else:
                res_mse[dataset.replace("_", " ")][k] = v
    
    return res_mse, res_mae

In [3]:
dict_res_roberta, dict_res_camem, dict_res_xlm = collect_losses("../output")

# divide datasets in mse and mae results
res_mse_roberta, res_mae_roberta = divide_mse_mae(dict_res_roberta)
res_mse_camem, res_mae_camem = divide_mse_mae(dict_res_camem)
res_mse_xlm, res_mae_xlm = divide_mse_mae(dict_res_xlm)
res_mse_roberta, res_mae_roberta = divide_mse_mae(dict_res_roberta)
res_mse_camem, res_mae_camem = divide_mse_mae(dict_res_camem)
res_mse_xlm, res_mae_xlm = divide_mse_mae(dict_res_xlm)

## ROBERTA Results

In [4]:
pd.DataFrame(res_mse_roberta).T

Unnamed: 0,mse skip,mse firstfix dur,mse firstrun dur,mse dur,mse firstrun nfix,mse nfix,mse refix,mse reread
en 57 p,1634.24801,190.328665,56.741089,42.822611,53.009018,35.124177,799.516632,1211.283344
en 98 p,681.125654,79.933669,64.387167,78.433762,182.754953,81.822781,671.604527,1120.801659
en 49 p,1641.783783,230.240421,153.144654,77.712659,167.494364,157.115408,661.051993,803.368231
en 6 p,929.996343,56.646202,35.533469,79.285073,147.722311,112.070343,1023.547217,1392.260237
en 98 np,752.298468,99.906328,80.489239,103.608047,215.310588,111.283185,772.76748,1276.766205
en 49 np,1551.128967,238.496854,158.618608,80.3925,175.372326,160.801877,652.709409,785.995842
en 83 p,1237.130617,107.014177,33.310039,56.731773,53.22836,86.831578,298.311242,440.043237
en 6 np,1016.409552,74.978531,49.176037,97.580716,178.130702,137.261862,1154.596466,1480.479456
en 57 np,1528.210168,198.535983,60.85631,46.366292,57.768185,38.84697,832.256726,1225.563739
en 83 np,1369.336523,130.61959,43.456786,69.730474,69.03563,109.379874,309.437395,482.862338


In [5]:
pd.DataFrame(res_mae_roberta).T

Unnamed: 0,acc skip,acc firstfix dur,acc firstrun dur,acc dur,acc firstrun nfix,acc nfix,acc refix,acc reread
en 57 p,80.936015,91.600892,96.278488,97.051629,96.37781,97.366644,89.873296,85.505472
en 98 p,91.535548,95.222071,96.248376,96.041697,92.544908,95.915789,90.409641,85.3563
en 49 p,80.434763,90.985132,93.054857,95.769197,92.065542,92.929263,91.572475,90.191367
en 6 p,88.803383,96.24219,97.334923,96.032963,93.638166,95.185924,86.66373,82.745772
en 98 np,89.758473,94.262731,95.413756,95.187826,91.347372,94.812212,89.681785,83.625769
en 49 np,79.856446,90.787055,92.904513,95.653962,91.874859,92.842367,91.393085,89.56624
en 83 p,84.59199,94.465822,97.297675,96.626028,96.201922,95.538469,95.393009,94.022581
en 6 np,86.108719,95.348361,96.709341,95.390422,92.422509,94.417915,85.217033,80.871325
en 57 np,80.152442,91.520455,96.074147,96.881589,96.163839,97.159642,89.655005,85.196898
en 83 np,82.042829,93.428023,96.553565,95.811393,95.216019,94.424506,95.656746,93.785998


In [6]:
process_to_present(res_mse_roberta)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mse skip & mse firstfix dur & mse firstrun dur & mse dur & mse firstrun nfix & mse nfix & mse refix & mse reread \\
en 57 p & 1634.2 & 190.3 & 56.7 & 42.8 & 53.0 & 35.1 & 799.5 & 1211.3 \\
en 98 p & 681.1 & 79.9 & 64.4 & 78.4 & 182.8 & 81.8 & 671.6 & 1120.8 \\
en 49 p & 1641.8 & 230.2 & 153.1 & 77.7 & 167.5 & 157.1 & 661.1 & 803.4 \\
en 6 p & 930.0 & 56.6 & 35.5 & 79.3 & 147.7 & 112.1 & 1023.5 & 1392.3 \\
en 98 np & 752.3 & 99.9 & 80.5 & 103.6 & 215.3 & 111.3 & 772.8 & 1276.8 \\
en 49 np & 1551.1 & 238.5 & 158.6 & 80.4 & 175.4 & 160.8 & 652.7 & 786.0 \\
en 83 p & 1237.1 & 107.0 & 33.3 & 56.7 & 53.2 & 86.8 & 298.3 & 440.0 \\
en 6 np & 1016.4 & 75.0 & 49.2 & 97.6 & 178.1 & 137.3 & 1154.6 & 1480.5 \\
en 57 np & 1528.2 & 198.5 & 60.9 & 46.4 & 57.8 & 38.8 & 832.3 & 1225.6 \\
en 83 np & 1369.3 & 130.6 & 43.5 & 69.7 & 69.0 & 109.4 & 309.4 & 482.9 \\
\end{tabular}



In [7]:
process_to_present(res_mae_roberta)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & acc skip & acc firstfix dur & acc firstrun dur & acc dur & acc firstrun nfix & acc nfix & acc refix & acc reread \\
en 57 p & 80.9 & 91.6 & 96.3 & 97.1 & 96.4 & 97.4 & 89.9 & 85.5 \\
en 98 p & 91.5 & 95.2 & 96.2 & 96.0 & 92.5 & 95.9 & 90.4 & 85.4 \\
en 49 p & 80.4 & 91.0 & 93.1 & 95.8 & 92.1 & 92.9 & 91.6 & 90.2 \\
en 6 p & 88.8 & 96.2 & 97.3 & 96.0 & 93.6 & 95.2 & 86.7 & 82.7 \\
en 98 np & 89.8 & 94.3 & 95.4 & 95.2 & 91.3 & 94.8 & 89.7 & 83.6 \\
en 49 np & 79.9 & 90.8 & 92.9 & 95.7 & 91.9 & 92.8 & 91.4 & 89.6 \\
en 83 p & 84.6 & 94.5 & 97.3 & 96.6 & 96.2 & 95.5 & 95.4 & 94.0 \\
en 6 np & 86.1 & 95.3 & 96.7 & 95.4 & 92.4 & 94.4 & 85.2 & 80.9 \\
en 57 np & 80.2 & 91.5 & 96.1 & 96.9 & 96.2 & 97.2 & 89.7 & 85.2 \\
en 83 np & 82.0 & 93.4 & 96.6 & 95.8 & 95.2 & 94.4 & 95.7 & 93.8 \\
\end{tabular}



## CAMEM Results

In [8]:
pd.DataFrame(res_mse_camem).T

Unnamed: 0,mse skip,mse firstfix dur,mse firstrun dur,mse dur,mse firstrun nfix,mse nfix,mse refix,mse reread
it 43 np,1206.30036,173.004082,78.098402,125.115767,172.095854,123.651561,458.565247,704.113882
it 44 np,1018.162354,200.414616,119.260728,87.617688,136.780164,97.163,803.331787,1316.00351
it 43 p,1330.664691,205.895341,86.881858,136.759622,199.259041,134.072346,454.895932,710.045641
it 38 p,817.610117,202.879382,122.070423,139.977772,126.30136,196.531737,892.887756,2016.68385
it 38 np,728.090054,183.034164,114.021289,132.696457,119.029887,184.730254,873.258008,1829.686084
it 1 p,872.407385,80.217889,43.266185,24.359126,34.548071,30.602466,1055.370343,1172.471674
it 26 p,1040.795255,84.046247,10.801943,16.457597,9.04986,15.785651,736.008937,806.800584
it 44 p,1120.577582,219.616629,125.633448,89.315589,143.462997,100.022496,817.348456,1413.829694
it 26 np,847.579816,80.379245,12.460171,18.572121,11.45368,17.01199,763.118585,769.429575
it 1 np,804.015637,74.763204,44.976555,25.108034,36.397618,31.467024,1021.171082,1131.100989


In [9]:
pd.DataFrame(res_mae_camem).T

Unnamed: 0,acc skip,acc firstfix dur,acc firstrun dur,acc dur,acc firstrun nfix,acc nfix,acc refix,acc reread
it 43 np,84.482537,92.927086,95.771733,94.733109,92.879915,94.579235,94.081189,91.349829
it 44 np,86.660609,92.268757,94.419554,96.250387,93.682702,95.835658,89.771833,83.482413
it 43 p,84.640594,92.313666,95.512973,94.467232,92.403717,94.346042,94.142672,91.641531
it 38 p,90.362701,92.147791,94.287865,94.58603,93.795489,93.357287,89.321315,76.889207
it 38 np,89.716982,92.59961,94.538732,94.779149,94.019513,93.697343,88.815717,77.041099
it 1 p,89.908264,95.445111,97.200837,98.348715,97.417191,98.31812,87.505358,85.758344
it 26 p,88.288848,95.277229,98.673864,98.352032,98.91822,98.358112,91.027534,90.241812
it 44 p,87.166425,91.935749,94.280787,96.208126,93.531048,95.741839,89.9281,83.722901
it 26 np,88.854681,95.547444,98.63147,98.351057,98.862819,98.367582,90.477017,90.164736
it 1 np,89.395407,95.670433,97.205954,98.25528,97.422914,98.237387,87.439514,85.555516


In [16]:
process_to_present(res_mse_camem)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mse skip & mse firstfix dur & mse firstrun dur & mse dur & mse firstrun nfix & mse nfix & mse refix & mse reread \\
it 43 np & 1206.3 & 173.0 & 78.1 & 125.1 & 172.1 & 123.7 & 458.6 & 704.1 \\
it 44 np & 1018.2 & 200.4 & 119.3 & 87.6 & 136.8 & 97.2 & 803.3 & 1316.0 \\
it 43 p & 1330.7 & 205.9 & 86.9 & 136.8 & 199.3 & 134.1 & 454.9 & 710.0 \\
it 38 p & 817.6 & 202.9 & 122.1 & 140.0 & 126.3 & 196.5 & 892.9 & 2016.7 \\
it 38 np & 728.1 & 183.0 & 114.0 & 132.7 & 119.0 & 184.7 & 873.3 & 1829.7 \\
it 1 p & 872.4 & 80.2 & 43.3 & 24.4 & 34.5 & 30.6 & 1055.4 & 1172.5 \\
it 26 p & 1040.8 & 84.0 & 10.8 & 16.5 & 9.0 & 15.8 & 736.0 & 806.8 \\
it 44 p & 1120.6 & 219.6 & 125.6 & 89.3 & 143.5 & 100.0 & 817.3 & 1413.8 \\
it 26 np & 847.6 & 80.4 & 12.5 & 18.6 & 11.5 & 17.0 & 763.1 & 769.4 \\
it 1 np & 804.0 & 74.8 & 45.0 & 25.1 & 36.4 & 31.5 & 1021.2 & 1131.1 \\
\end{tabular}



In [17]:
process_to_present(res_mae_camem)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & acc skip & acc firstfix dur & acc firstrun dur & acc dur & acc firstrun nfix & acc nfix & acc refix & acc reread \\
it 43 np & 84.5 & 92.9 & 95.8 & 94.7 & 92.9 & 94.6 & 94.1 & 91.3 \\
it 44 np & 86.7 & 92.3 & 94.4 & 96.3 & 93.7 & 95.8 & 89.8 & 83.5 \\
it 43 p & 84.6 & 92.3 & 95.5 & 94.5 & 92.4 & 94.3 & 94.1 & 91.6 \\
it 38 p & 90.4 & 92.1 & 94.3 & 94.6 & 93.8 & 93.4 & 89.3 & 76.9 \\
it 38 np & 89.7 & 92.6 & 94.5 & 94.8 & 94.0 & 93.7 & 88.8 & 77.0 \\
it 1 p & 89.9 & 95.4 & 97.2 & 98.3 & 97.4 & 98.3 & 87.5 & 85.8 \\
it 26 p & 88.3 & 95.3 & 98.7 & 98.4 & 98.9 & 98.4 & 91.0 & 90.2 \\
it 44 p & 87.2 & 91.9 & 94.3 & 96.2 & 93.5 & 95.7 & 89.9 & 83.7 \\
it 26 np & 88.9 & 95.5 & 98.6 & 98.4 & 98.9 & 98.4 & 90.5 & 90.2 \\
it 1 np & 89.4 & 95.7 & 97.2 & 98.3 & 97.4 & 98.2 & 87.4 & 85.6 \\
\end{tabular}



## XLM Results

In [12]:
pd.DataFrame(res_mse_xlm).T

Unnamed: 0,mse skip,mse firstfix dur,mse firstrun dur,mse dur,mse firstrun nfix,mse nfix,mse refix,mse reread
en 57 np,1270.764838,167.737971,50.778973,38.109819,48.09559,31.998088,703.770883,1025.670178
en 49 np,1309.767798,205.65639,133.99713,68.094334,148.003922,136.567917,555.26832,663.641434
it 26 p,767.759458,61.206044,7.577573,11.213895,7.152116,10.180172,608.796318,555.939288
en 6 np,862.288953,63.599807,40.213116,80.702182,150.806509,113.195927,956.231122,1229.431873
en 57 p,1337.511035,158.279008,47.147339,35.792365,43.867191,29.636941,683.472897,1016.57442
it 38 np,677.688998,171.844495,103.448839,119.294678,111.591663,166.879243,799.21922,1695.120044
it 26 np,786.655841,75.333921,12.44749,17.735952,12.036681,16.576627,714.494339,722.975598
it 43 p,1097.647626,146.86917,60.97436,98.131592,141.42112,95.740025,385.269154,590.5429
en 98 np,625.520605,85.01949,66.749213,85.90805,183.159279,92.367079,646.508899,1061.566541
it 44 p,997.764673,178.749805,99.863415,76.85969,115.175173,84.539333,750.892014,1284.001126


In [13]:
pd.DataFrame(res_mae_xlm).T

Unnamed: 0,acc skip,acc firstfix dur,acc firstrun dur,acc dur,acc firstrun nfix,acc nfix,acc refix,acc reread
en 57 np,83.399261,92.837086,96.714246,97.402672,96.772402,97.636231,91.252589,87.567564
en 49 np,83.173832,92.235203,94.058708,96.355363,93.207483,93.958837,92.66467,91.227933
it 26 p,90.711237,96.345626,99.007848,98.808439,99.141455,98.806734,92.151036,92.426487
en 6 np,88.543914,96.096418,97.273891,96.21085,93.63892,95.402297,87.73969,84.241499
en 57 p,84.068332,93.05534,96.909855,97.542775,97.0092,97.791785,91.408952,87.969998
it 38 np,90.338556,93.053664,94.981744,95.228799,94.427576,94.208967,89.724246,78.681989
it 26 np,89.618568,95.868273,98.71704,98.450852,98.898129,98.453988,91.109473,90.859968
it 43 p,86.652954,93.905961,96.562402,95.684322,93.986192,95.604116,94.234101,92.182936
en 98 np,91.568076,95.171344,96.190196,96.00109,92.711591,95.697368,91.481723,86.431977
it 44 p,87.762856,92.920328,95.071892,96.633848,94.394463,96.291843,89.881325,84.358306


In [14]:
process_to_present(res_mse_xlm)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & mse skip & mse firstfix dur & mse firstrun dur & mse dur & mse firstrun nfix & mse nfix & mse refix & mse reread \\
en 57 np & 1270.8 & 167.7 & 50.8 & 38.1 & 48.1 & 32.0 & 703.8 & 1025.7 \\
en 49 np & 1309.8 & 205.7 & 134.0 & 68.1 & 148.0 & 136.6 & 555.3 & 663.6 \\
it 26 p & 767.8 & 61.2 & 7.6 & 11.2 & 7.2 & 10.2 & 608.8 & 555.9 \\
en 6 np & 862.3 & 63.6 & 40.2 & 80.7 & 150.8 & 113.2 & 956.2 & 1229.4 \\
en 57 p & 1337.5 & 158.3 & 47.1 & 35.8 & 43.9 & 29.6 & 683.5 & 1016.6 \\
it 38 np & 677.7 & 171.8 & 103.4 & 119.3 & 111.6 & 166.9 & 799.2 & 1695.1 \\
it 26 np & 786.7 & 75.3 & 12.4 & 17.7 & 12.0 & 16.6 & 714.5 & 723.0 \\
it 43 p & 1097.6 & 146.9 & 61.0 & 98.1 & 141.4 & 95.7 & 385.3 & 590.5 \\
en 98 np & 625.5 & 85.0 & 66.7 & 85.9 & 183.2 & 92.4 & 646.5 & 1061.6 \\
it 44 p & 997.8 & 178.7 & 99.9 & 76.9 & 115.2 & 84.5 & 750.9 & 1284.0 \\
en 49 p & 1354.3 & 202.0 & 130.5 & 63.5 & 144.9 & 131.7 & 552.6 & 6

In [15]:
process_to_present(res_mae_xlm)

\begin{tabular}{lp{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}p{1.1cm}}
 & acc skip & acc firstfix dur & acc firstrun dur & acc dur & acc firstrun nfix & acc nfix & acc refix & acc reread \\
en 57 np & 83.4 & 92.8 & 96.7 & 97.4 & 96.8 & 97.6 & 91.3 & 87.6 \\
en 49 np & 83.2 & 92.2 & 94.1 & 96.4 & 93.2 & 94.0 & 92.7 & 91.2 \\
it 26 p & 90.7 & 96.3 & 99.0 & 98.8 & 99.1 & 98.8 & 92.2 & 92.4 \\
en 6 np & 88.5 & 96.1 & 97.3 & 96.2 & 93.6 & 95.4 & 87.7 & 84.2 \\
en 57 p & 84.1 & 93.1 & 96.9 & 97.5 & 97.0 & 97.8 & 91.4 & 88.0 \\
it 38 np & 90.3 & 93.1 & 95.0 & 95.2 & 94.4 & 94.2 & 89.7 & 78.7 \\
it 26 np & 89.6 & 95.9 & 98.7 & 98.5 & 98.9 & 98.5 & 91.1 & 90.9 \\
it 43 p & 86.7 & 93.9 & 96.6 & 95.7 & 94.0 & 95.6 & 94.2 & 92.2 \\
en 98 np & 91.6 & 95.2 & 96.2 & 96.0 & 92.7 & 95.7 & 91.5 & 86.4 \\
it 44 p & 87.8 & 92.9 & 95.1 & 96.6 & 94.4 & 96.3 & 89.9 & 84.4 \\
en 49 p & 83.5 & 92.3 & 94.1 & 96.5 & 93.3 & 94.1 & 92.6 & 91.2 \\
it 1 p & 90.6 & 96.4 & 97.8 & 98.7 & 98.1 & 98.7 & 88.9 