In [1]:
import pandas as pd
import json
import os
from collections import defaultdict
import configparser
import qgrid

In [19]:
def pp(df):
    return qgrid.show_grid(df, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 60})

In [2]:
torch_results_dir = os.path.join(os.getcwd(), os.pardir, "saved_models", "torch")
torch_results_files = [os.path.join(torch_results_dir, f, "results.json") for f in os.listdir(torch_results_dir)]

new_torch_results_dir = os.path.join(os.getcwd(), os.pardir, "saved_models", "new_torch")
new_torch_results_files = [os.path.join(new_torch_results_dir, f, "results.json") for f in os.listdir(new_torch_results_dir)]

keras_results_dir = os.path.join(os.getcwd(), os.pardir, "saved_models", "keras")
keras_results_files = [os.path.join(keras_results_dir, f, "results.json") for f in os.listdir(keras_results_dir)]

keras_albert_results_dir = os.path.join(os.getcwd(), os.pardir, "saved_models", "new_keras_albert")
keras_albert_results_files = [os.path.join(keras_albert_results_dir, f, "results.json") for f in os.listdir(keras_albert_results_dir)]

In [3]:
# torch_files = []

# for f in torch_results_files:
#     if "scibert" in f:
#         sub_folder = f.split("results.json")[0]
#         sub_files = os.listdir(sub_folder)
#         print(sub_folder)
#         sub_files = [os.path.join(f, sf, "results.json") for sf in sub_files]
#         for sf in sub_files:
#             torch_files.append(sf)
#     else:
#         torch_files.append(f)

# torch_results_files = torch_files

In [4]:
def result_to_df(files, _type):
    
    type_mapping = {"torch": ["model_version", ["lemmatize", "balance_dataset", "shuffle_data", "epochs", "batch_size", "learning_rate", "max_len", "bfloat16"]], 
                "keras": ["embedding_type", ["lemmatize", "balance_dataset", "number_of_epochs", "batch_size", "learning_rate", "max_len"]]}
    model_type, params = type_mapping[_type]
    
    results = []
    for file in list(set(files)):

        if not os.path.isfile(file):
            continue
        with open(file, "r") as f:
            res = json.load(f)
            
            keys = [x.split("=")[0] for x in file.split("&")]
            
            if _type == "torch":
                res["model"] = [x for x in file.split("&") if model_type in x][0].split("=")[-1]
            elif _type == "keras":
#                 keys = [x.split("=")[0] for x in file.split("&")]
                res["model"] = [x for x in file.split("&") if model_type in x][0].split("=")[-1] if model_type in keys else ""

            config = configparser.ConfigParser()
            config.read(file.replace("results.json", "config.txt"))

            for param in params:
                res[param] = config["param"][param]
#                 print(param)

            results.append(res)
#             print(file)
#             print(res)
            # break
    
    headers = ["precision", "recall", "f1-score", "0_precision", "0_recall", "0_f1-score", "1_precision", "1_recall", "1_f1-score", "2_precision", "2_recall", "2_f1-score"]
    mapping = {"precision": "p", "recall": "r", "f1-score": "f1", "0-precision": "M-p", "0-recall": "M-r", "0-f1-score": "M-f1", "1-precision": "B-p", "1-recall": "B-r", "1-f1-score": "B-f1", "2-precision": "R-p", "2-recall": "R-r", "2-f1-score": "R-f1", "model": "model", "lemmatize": "l", "balance_dataset": "b", "shuffle_data": "s", "epochs": "epoch", "use_attention": "a", "number_of_epochs": "epoch", "batch_size": "batch", "learning_rate": "lr", "max_len": "len", "bfloat16": "bf16"}
    
    result_table = defaultdict(list)

    for result in results:
        result_table[mapping["model"]].append(result["model"])
        result_table[mapping["lemmatize"]].append(result["lemmatize"][0])
        result_table[mapping["balance_dataset"]].append(result["balance_dataset"][0])
        if _type == "torch":
            result_table[mapping["shuffle_data"]].append(result["shuffle_data"][0])
            result_table[mapping["bfloat16"]].append(result["bfloat16"][0])
            result_table[mapping["epochs"]].append(result["epochs"])
        elif _type == "keras":
            result_table[mapping["number_of_epochs"]].append(result["number_of_epochs"])
            result_table[mapping["use_attention"]].append(result["use_attention"])
        result_table[mapping["batch_size"]].append(result["batch_size"])
        result_table[mapping["learning_rate"]].append(result["learning_rate"])
        result_table[mapping["max_len"]].append(result["max_len"])
        for header in headers:
            if "_" in header:
                _class,_metric = header.split("_")
                if _type == "keras":
#                     _class = str(int(_class)+2)
                    score = result[str(int(_class)+2)][_metric]
                else:
                    score = result[_class][_metric]
                result_table[mapping[_class+"-"+_metric]].append(round(score, 3))
            else:
                _metric = header
                score = result["macro avg"][_metric]
                result_table[mapping[_metric]].append(round(score, 3))
                
    return pd.DataFrame(result_table)

In [5]:
new_torch_df = result_to_df(new_torch_results_files, "torch")
new_torch_df["model"] = new_torch_df["model"].str.replace("allenai\+", "").str.replace("_", "-")
new_torch_df = new_torch_df.sort_values(by=['model', 'len', 'l', 'b'])
new_torch_df

Unnamed: 0,model,l,b,s,bf16,epoch,batch,lr,len,p,...,f1,M-p,M-r,M-f1,B-p,B-r,B-f1,R-p,R-r,R-f1
9,albert-base-v2,F,T,F,T,5,8,1e-05,100,0.734,...,0.742,0.684,0.889,0.774,0.954,0.61,0.744,0.564,0.954,0.709
0,albert-base-v2,T,F,F,T,5,8,5e-06,100,0.82,...,0.826,0.828,0.793,0.81,0.858,0.861,0.859,0.776,0.842,0.807
4,albert-base-v2,T,T,F,T,5,8,1e-05,100,0.709,...,0.705,0.695,0.876,0.775,0.95,0.557,0.702,0.481,0.954,0.639
3,albert-base-v2,T,F,F,T,5,8,1e-05,50,0.8,...,0.81,0.837,0.787,0.811,0.853,0.845,0.849,0.711,0.838,0.77
5,albert-base-v2,T,F,T,T,5,8,1e-05,50,0.825,...,0.812,0.814,0.818,0.816,0.839,0.865,0.852,0.823,0.718,0.767
2,bert-base-cased,F,F,F,T,5,8,5e-06,100,0.817,...,0.836,0.822,0.845,0.833,0.905,0.826,0.864,0.726,0.919,0.811
6,bert-base-cased,F,T,F,T,5,8,1e-05,100,0.753,...,0.77,0.759,0.868,0.81,0.939,0.691,0.796,0.561,0.942,0.703
8,bert-base-cased,T,F,F,T,5,8,5e-06,100,0.811,...,0.828,0.84,0.831,0.836,0.893,0.835,0.863,0.7,0.892,0.784
14,bert-base-cased,T,T,F,T,5,8,1e-05,100,0.72,...,0.715,0.801,0.792,0.796,0.925,0.63,0.749,0.433,0.977,0.6
16,bert-base-cased,F,T,F,T,5,8,1e-05,50,0.745,...,0.757,0.83,0.78,0.804,0.897,0.726,0.803,0.509,0.954,0.664


### 50 vs 100

In [6]:
df_50_nl_nb = new_torch_df[(new_torch_df["l"]=="F") & (new_torch_df["b"]=="F") & (new_torch_df["len"]=="50")]
df_100_nl_nb = new_torch_df[(new_torch_df["l"]=="F") & (new_torch_df["b"]=="F") & (new_torch_df["len"]=="100")]
df_50_100_nl_nb = df_50_nl_nb[["model", "l", "b", "f1"]].merge(df_100_nl_nb[["model", "l", "b", "f1"]], suffixes=('_50', '_100'), left_on="model", right_on="model")

df_50_l_nb = new_torch_df[(new_torch_df["l"]=="T") & (new_torch_df["b"]=="F") & (new_torch_df["len"]=="50")]
df_100_l_nb = new_torch_df[(new_torch_df["l"]=="T") & (new_torch_df["b"]=="F") & (new_torch_df["len"]=="100")]
df_50_100_l_nb = df_50_l_nb[["model", "l", "b", "f1"]].merge(df_100_l_nb[["model", "l", "b", "f1"]], suffixes=('_50', '_100'), left_on="model", right_on="model")

df_50_nl_b = new_torch_df[(new_torch_df["l"]=="F") & (new_torch_df["b"]=="T") & (new_torch_df["len"]=="50")]
df_100_nl_b = new_torch_df[(new_torch_df["l"]=="F") & (new_torch_df["b"]=="T") & (new_torch_df["len"]=="100")]
df_50_100_nl_b = df_50_nl_b[["model", "l", "b", "f1"]].merge(df_100_nl_b[["model", "l", "b", "f1"]], suffixes=('_50', '_100'), left_on="model", right_on="model")

df_50_l_b = new_torch_df[(new_torch_df["l"]=="T") & (new_torch_df["b"]=="T") & (new_torch_df["len"]=="50")]
df_100_l_b = new_torch_df[(new_torch_df["l"]=="T") & (new_torch_df["b"]=="T") & (new_torch_df["len"]=="100")]
df_50_100_l_b = df_50_l_b[["model", "l", "b", "f1"]].merge(df_100_l_b[["model", "l", "b", "f1"]], suffixes=('_50', '_100'), left_on="model", right_on="model")

In [7]:
pd.concat([df_50_100_l_nb, df_50_100_nl_b, df_50_100_l_b])

Unnamed: 0,model,l_50,b_50,f1_50,l_100,b_100,f1_100
0,albert-base-v2,T,F,0.81,T,F,0.826
1,albert-base-v2,T,F,0.812,T,F,0.826
0,bert-base-cased,F,T,0.757,F,T,0.77
1,bert-base-uncased,F,T,0.732,F,T,0.785
0,bert-base-cased,T,T,0.734,T,T,0.715
1,bert-base-uncased,T,T,0.77,T,T,0.773


### Lemma vs no lemma (100) 

### Balanced vs unbalanced (50, 100)

In [8]:
df_50_l_nb

Unnamed: 0,model,l,b,s,bf16,epoch,batch,lr,len,p,...,f1,M-p,M-r,M-f1,B-p,B-r,B-f1,R-p,R-r,R-f1
3,albert-base-v2,T,F,F,T,5,8,1e-05,50,0.8,...,0.81,0.837,0.787,0.811,0.853,0.845,0.849,0.711,0.838,0.77
5,albert-base-v2,T,F,T,T,5,8,1e-05,50,0.825,...,0.812,0.814,0.818,0.816,0.839,0.865,0.852,0.823,0.718,0.767


In [9]:
df_b_nb_50_nl = df_50_nl_nb[["model", "l", "b", "f1"]].merge(df_50_nl_b[["model", "l", "b", "f1"]], suffixes=('', '_b'), left_on="model", right_on="model")

df_b_nb_50_l = df_50_l_nb[["model", "l", "b", "f1"]].merge(df_50_l_b[["model", "l", "b", "f1"]], suffixes=('', '_b'), left_on="model", right_on="model")

df_b_nb_100_nl = df_100_nl_nb[["model", "l", "b", "f1"]].merge(df_100_nl_b[["model", "l", "b", "f1"]], suffixes=('', '_b'), left_on="model", right_on="model")

df_b_nb_100_l = df_100_l_nb[["model", "l", "b", "f1"]].merge(df_100_l_b[["model", "l", "b", "f1"]], suffixes=('', '_b'), left_on="model", right_on="model")

In [10]:
pd.concat([df_b_nb_50_l, df_b_nb_100_nl, df_b_nb_100_l])

Unnamed: 0,model,l,b,f1,l_b,b_b,f1_b
0,bert-base-cased,F,F,0.836,F,T,0.77
1,bert-base-uncased,F,F,0.827,F,T,0.785
0,albert-base-v2,T,F,0.826,T,T,0.705
1,bert-base-cased,T,F,0.828,T,T,0.715
2,bert-base-uncased,T,F,0.81,T,T,0.773


In [11]:
torch_df = result_to_df(torch_results_files, "torch")
torch_df["model"] = torch_df["model"].str.replace("allenai\+", "").str.replace("_", "-")
torch_df = torch_df.sort_values(by=['model', 'len', 'l', 'b'])

In [20]:
pp(torch_df)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

In [337]:
lemmatized_df = torch_df[(torch_df["l"]=="T") & (torch_df["b"]=="T")]
non_lemmatized_df = torch_df[(torch_df["l"]=="F") & (torch_df["b"]=="T")]

lemmatized_df[["model", "f1", "M-p", "M-r", "M-f1", "B-p", "B-r", "B-f1", "R-p", "R-r", "R-f1"]].merge(non_lemmatized_df[["model", "f1", "M-p", "M-r", "M-f1", "B-p", "B-r", "B-f1", "R-p", "R-r", "R-f1"]], left_on="model", right_on="model")

Unnamed: 0,model,f1_x,M-p_x,M-r_x,M-f1_x,B-p_x,B-r_x,B-f1_x,R-p_x,R-r_x,...,f1_y,M-p_y,M-r_y,M-f1_y,B-p_y,B-r_y,B-f1_y,R-p_y,R-r_y,R-f1_y
0,albert-base-v2,0.705,0.695,0.876,0.775,0.95,0.557,0.702,0.481,0.954,...,0.742,0.684,0.889,0.774,0.954,0.61,0.744,0.564,0.954,0.709
1,bert-base-cased,0.756,0.828,0.818,0.823,0.923,0.694,0.792,0.491,0.973,...,0.757,0.877,0.767,0.818,0.904,0.732,0.809,0.48,0.973,0.643
2,bert-base-cased,0.756,0.828,0.818,0.823,0.923,0.694,0.792,0.491,0.973,...,0.759,0.824,0.816,0.82,0.902,0.71,0.794,0.512,0.942,0.663
3,bert-base-cased,0.737,0.839,0.759,0.797,0.879,0.712,0.787,0.474,0.927,...,0.757,0.877,0.767,0.818,0.904,0.732,0.809,0.48,0.973,0.643
4,bert-base-cased,0.737,0.839,0.759,0.797,0.879,0.712,0.787,0.474,0.927,...,0.759,0.824,0.816,0.82,0.902,0.71,0.794,0.512,0.942,0.663
5,bert-base-uncased,0.769,0.775,0.843,0.808,0.922,0.704,0.799,0.557,0.95,...,0.76,0.781,0.864,0.821,0.945,0.667,0.782,0.518,0.977,0.677
6,bert-base-uncased,0.77,0.782,0.835,0.807,0.888,0.747,0.812,0.582,0.846,...,0.76,0.781,0.864,0.821,0.945,0.667,0.782,0.518,0.977,0.677


In [None]:
qgr

In [210]:
torch_df[["model", "l", "f1", "M-p", "M-r", "M-f1", "B-p", "B-r", "B-f1", "R-p", "R-r", "R-f1"]]

Unnamed: 0,model,l,f1,M-p,M-r,M-f1,B-p,B-r,B-f1,R-p,R-r,R-f1
7,albert-base-v2,F,0.742,0.684,0.889,0.774,0.954,0.61,0.744,0.564,0.954,0.709
17,albert-base-v2,T,0.826,0.828,0.793,0.81,0.858,0.861,0.859,0.776,0.842,0.807
11,albert-base-v2,T,0.705,0.695,0.876,0.775,0.95,0.557,0.702,0.481,0.954,0.639
0,albert-base-v2,T,0.81,0.837,0.787,0.811,0.853,0.845,0.849,0.711,0.838,0.77
20,albert-base-v2,T,0.812,0.814,0.818,0.816,0.839,0.865,0.852,0.823,0.718,0.767
6,bert-base-cased,F,0.843,0.865,0.815,0.839,0.887,0.858,0.872,0.733,0.923,0.817
3,bert-base-cased,F,0.757,0.877,0.767,0.818,0.904,0.732,0.809,0.48,0.973,0.643
4,bert-base-cased,T,0.805,0.826,0.825,0.825,0.897,0.794,0.843,0.631,0.911,0.746
14,bert-base-cased,T,0.756,0.828,0.818,0.823,0.923,0.694,0.792,0.491,0.973,0.653
12,bert-base-cased,F,0.759,0.824,0.816,0.82,0.902,0.71,0.794,0.512,0.942,0.663


In [215]:
unbalanced_df = torch_df[torch_df["b"] == "F"]
print(unbalanced_df[["model", "l", "p", "r", "f1", "M-p", "M-r", "M-f1", "B-p", "B-r", "B-f1", "R-p", "R-r", "R-f1"]].to_latex(index=False))

\begin{tabular}{llrrrrrrrrrrrr}
\toprule
                    model &  l &      p &      r &     f1 &    M-p &    M-r &   M-f1 &    B-p &    B-r &   B-f1 &    R-p &    R-r &   R-f1 \\
\midrule
           albert-base-v2 &  T &  0.820 &  0.832 &  0.826 &  0.828 &  0.793 &  0.810 &  0.858 &  0.861 &  0.859 &  0.776 &  0.842 &  0.807 \\
           albert-base-v2 &  T &  0.800 &  0.823 &  0.810 &  0.837 &  0.787 &  0.811 &  0.853 &  0.845 &  0.849 &  0.711 &  0.838 &  0.770 \\
           albert-base-v2 &  T &  0.825 &  0.800 &  0.812 &  0.814 &  0.818 &  0.816 &  0.839 &  0.865 &  0.852 &  0.823 &  0.718 &  0.767 \\
          bert-base-cased &  F &  0.828 &  0.865 &  0.843 &  0.865 &  0.815 &  0.839 &  0.887 &  0.858 &  0.872 &  0.733 &  0.923 &  0.817 \\
          bert-base-cased &  T &  0.785 &  0.843 &  0.805 &  0.826 &  0.825 &  0.825 &  0.897 &  0.794 &  0.843 &  0.631 &  0.911 &  0.746 \\
        bert-base-uncased &  T &  0.805 &  0.842 &  0.819 &  0.856 &  0.797 &  0.825 &  0.874 &  0

In [225]:
keras_df = result_to_df(keras_results_files, "keras")
keras_df

Unnamed: 0,model,l,b,epoch,batch,lr,len,p,r,f1,M-p,M-r,M-f1,B-p,B-r,B-f1,R-p,R-r,R-f1
0,lstm/results.json,T,F,5,64,0.99,100,0.739,0.76,0.748,0.855,0.772,0.811,0.725,0.81,0.765,0.637,0.699,0.667
1,albert/results.json,F,F,5,64,0.99,50,0.108,0.333,0.164,0.0,0.0,0.0,0.325,1.0,0.491,0.0,0.0,0.0
2,lstm/results.json,F,F,5,64,0.99,100,0.782,0.756,0.767,0.803,0.852,0.827,0.803,0.764,0.783,0.738,0.653,0.693
3,lstm/results.json,F,F,5,64,0.99,100,0.712,0.723,0.714,0.783,0.808,0.795,0.789,0.673,0.726,0.565,0.687,0.62
4,lstm/results.json,F,F,5,64,0.99,100,0.739,0.748,0.742,0.798,0.814,0.806,0.786,0.716,0.749,0.634,0.714,0.672
5,lstm/results.json,F,F,20,64,0.99,100,0.676,0.697,0.684,0.767,0.747,0.757,0.727,0.688,0.707,0.535,0.656,0.589
6,lstm/results.json,T,F,20,64,0.99,100,0.711,0.741,0.723,0.815,0.754,0.783,0.742,0.759,0.75,0.577,0.71,0.637
7,lstm/results.json,F,F,20,64,0.99,100,0.704,0.728,0.712,0.791,0.772,0.782,0.785,0.714,0.748,0.536,0.699,0.606
8,lstm/results.json,T,F,5,64,0.99,100,0.769,0.725,0.743,0.782,0.867,0.822,0.796,0.721,0.756,0.731,0.587,0.651
9,lstm/results.json,F,F,20,64,0.99,100,0.727,0.673,0.693,0.745,0.854,0.795,0.765,0.663,0.71,0.67,0.502,0.574


In [None]:
print(df.to_latex(columns=['model', 'p', 'r',
       'f1', '0-p', '0-r', '0-f1', '1-p', '1-r', '1-f1', '2-p', '2-r', '2-f1'], index=False))

In [255]:
keras_albert_df = result_to_df(keras_albert_results_files, "keras")
keras_albert_df

Unnamed: 0,model,l,b,epoch,batch,lr,len,p,r,f1,M-p,M-r,M-f1,B-p,B-r,B-f1,R-p,R-r,R-f1
0,,F,F,5,64,0.99,50,0.715,0.688,0.699,0.755,0.807,0.78,0.703,0.664,0.683,0.686,0.591,0.635
1,,F,F,5,64,0.99,100,0.806,0.765,0.781,0.782,0.895,0.834,0.846,0.674,0.751,0.79,0.726,0.757
2,,F,F,5,32,0.99,100,0.812,0.775,0.79,0.8,0.887,0.841,0.834,0.724,0.775,0.801,0.714,0.755
3,,F,F,5,32,0.99,50,0.735,0.649,0.676,0.708,0.886,0.787,0.768,0.547,0.639,0.731,0.514,0.603
4,,F,F,5,64,0.99,50,0.696,0.676,0.685,0.748,0.81,0.778,0.72,0.643,0.679,0.618,0.575,0.596
5,,F,F,5,32,0.99,50,0.681,0.698,0.687,0.787,0.694,0.738,0.649,0.762,0.701,0.607,0.637,0.621


In [206]:
from statistics import mean

In [207]:
# albert_4.conf

p0 = [68, 90, 81, 70, 83]
p1 = [95, 87, 89, 94, 91]
p2 = [56, 50, 59, 52, 51]
r0 = [89, 70, 82, 88, 83]
r1 = [61, 78, 75, 58, 71]
r2 = [95, 97, 94, 97, 95]
f0 = [77, 79, 82, 78, 83]
f1 = [74, 82, 81, 72, 79]
f2 = [71, 66, 73, 67, 66]
p = [73, 76, 76, 72, 75]
r = [82, 81, 84, 81, 83]
f = [74, 76, 78, 72, 76]

mean(p0), mean(p1), mean(p2), mean(r0), mean(r1), mean(r2), mean(f0), mean(f1), mean(f2), mean(p), mean(r), mean(f)

(78.4, 91.2, 53.6, 82.4, 68.6, 95.6, 79.8, 77.6, 68.6, 74.4, 82.2, 75.2)

In [209]:
###### albert_4_l.conf

p0 = [78, 68, 82, 71, 69]
p1 = [91, 95, 90, 94, 95]
p2 = [48, 47, 44, 46, 48]
r0 = [80, 88, 77, 84, 88]
r1 = [67, 54, 66, 58, 56]
r2 = [95, 95, 95, 96, 95]
f0 = [79, 77, 79, 77, 77]
f1 = [77, 69, 76, 72, 70]
f2 = [64, 63, 60, 62, 64]
p = [73, 70, 72, 71, 71]
r = [81, 79, 79, 79, 80]
f = [73, 70, 72, 71, 71]

mean(p0), mean(p1), mean(p2), mean(r0), mean(r1), mean(r2), mean(f0), mean(f1), mean(f2), mean(p), mean(r), mean(f)

(73.6, 93, 46.6, 83.4, 60.2, 95.2, 77.8, 72.8, 62.6, 71.4, 79.6, 71.4)