In [40]:
import pandas as pd
import json
import os
from collections import defaultdict
import configparser

In [10]:
results_dir = os.path.join(os.getcwd(), os.pardir, "saved_models", "torch")
results_files = [os.path.join(results_dir, f, "results.json") for f in os.listdir(results_dir)]

In [83]:
results = []
for file in results_files:

    if not os.path.isfile(file):
        continue
    with open(file, "r") as f:
        res = json.load(f)
        res["model"] = [x for x in file.split("&") if "model_version" in x][0].split("=")[-1]
        
        config = configparser.ConfigParser()
        config.read(file.replace("results.json", "config.txt"))

        for param in ["lemmatize", "balance_dataset", "shuffle_data", "epochs", "batch_size", "learning_rate", "max_len", "bfloat16"]:
            res[param] = config["param"][param]

        results.append(res)
        # break

In [121]:
headers = ["precision", "recall", "f1-score", "0_precision", "0_recall", "0_f1-score", "1_precision", "1_recall", "1_f1-score", "2_precision", "2_recall", "2_f1-score"]

mapping = {"precision": "p", "recall": "r", "f1-score": "f1", "0-precision": "0-p", "0-recall": "0-r", "0-f1-score": "0-f1", "1-precision": "1-p", "1-recall": "1-r", "1-f1-score": "1-f1", "2-precision": "2-p", "2-recall": "2-r", "2-f1-score": "2-f1", "model": "model", "lemmatize": "l", "balance_dataset": "b", "shuffle_data": "s", "epochs": "epoch", "batch_size": "batch", "learning_rate": "lr", "max_len": "len", "bfloat16": "bf16"}

In [128]:
result_table = defaultdict(list)

for result in results:
    result_table[mapping["model"]].append(result["model"])
    result_table[mapping["lemmatize"]].append(result["lemmatize"][0])
    result_table[mapping["balance_dataset"]].append(result["balance_dataset"][0])
    result_table[mapping["shuffle_data"]].append(result["shuffle_data"][0])
    result_table[mapping["epochs"]].append(result["epochs"])
    result_table[mapping["batch_size"]].append(result["batch_size"])
    result_table[mapping["learning_rate"]].append(result["learning_rate"])
    result_table[mapping["max_len"]].append(result["max_len"])
    result_table[mapping["bfloat16"]].append(result["bfloat16"][0])
    for header in headers:
        if "_" in header:
            _class,_metric = header.split("_")
            score = result[_class][_metric]
            result_table[mapping[_class+"-"+_metric]].append(round(score, 3))
        else:
            _metric = header
            score = result["macro avg"][_metric]
            result_table[mapping[_metric]].append(round(score, 3))

In [129]:
result_table

defaultdict(list,
            {'model': ['albert-base-v2',
              'albert-base-v2',
              'albert-base-v2',
              'bert-base-uncased',
              'albert-base-v2'],
             'l': ['F', 'F', 'F', 'F', 'F'],
             'b': ['F', 'F', 'T', 'T', 'F'],
             's': ['T', 'F', 'F', 'F', 'F'],
             'epoch': ['5', '5', '5', '5', '5'],
             'batch': ['8', '8', '8', '8', '8'],
             'lr': ['1e-5', '1e-5', '1e-5', '1e-5', '5e-6'],
             'len': ['50', '50', '100', '50', '100'],
             'bf16': ['T', 'T', 'T', 'T', 'T'],
             'p': [0.82, 0.814, 0.829, 0.818, 0.791],
             'r': [0.822, 0.827, 0.82, 0.79, 0.835],
             'f1': [0.817, 0.82, 0.817, 0.789, 0.807],
             '0-p': [0.89, 0.83, 0.865, 0.934, 0.842],
             '0-r': [0.735, 0.816, 0.815, 0.71, 0.795],
             '0-f1': [0.805, 0.823, 0.839, 0.807, 0.818],
             '1-p': [0.833, 0.862, 0.864, 0.832, 0.878],
             '1-r': [0.89

In [130]:
df = pd.DataFrame(result_table)

In [131]:
df

Unnamed: 0,model,l,b,s,epoch,batch,lr,len,bf16,p,...,f1,0-p,0-r,0-f1,1-p,1-r,1-f1,2-p,2-r,2-f1
0,albert-base-v2,F,F,T,5,8,1e-05,50,T,0.82,...,0.817,0.89,0.735,0.805,0.833,0.89,0.86,0.736,0.842,0.786
1,albert-base-v2,F,F,F,5,8,1e-05,50,T,0.814,...,0.82,0.83,0.816,0.823,0.862,0.851,0.857,0.751,0.815,0.781
2,albert-base-v2,F,T,F,5,8,1e-05,100,T,0.829,...,0.817,0.865,0.815,0.839,0.864,0.687,0.766,0.758,0.958,0.846
3,bert-base-uncased,F,T,F,5,8,1e-05,50,T,0.818,...,0.789,0.934,0.71,0.807,0.832,0.687,0.753,0.689,0.973,0.806
4,albert-base-v2,F,F,F,5,8,5e-06,100,T,0.791,...,0.807,0.842,0.795,0.818,0.878,0.827,0.852,0.654,0.884,0.752


In [135]:
print(df.to_latex(columns=['model', 'p', 'r',
       'f1', '0-p', '0-r', '0-f1', '1-p', '1-r', '1-f1', '2-p', '2-r', '2-f1'], index=False))

\begin{tabular}{lrrrrrrrrrrrr}
\toprule
             model &      p &      r &     f1 &    0-p &    0-r &   0-f1 &    1-p &    1-r &   1-f1 &    2-p &    2-r &   2-f1 \\
\midrule
    albert-base-v2 &  0.820 &  0.822 &  0.817 &  0.890 &  0.735 &  0.805 &  0.833 &  0.890 &  0.860 &  0.736 &  0.842 &  0.786 \\
    albert-base-v2 &  0.814 &  0.827 &  0.820 &  0.830 &  0.816 &  0.823 &  0.862 &  0.851 &  0.857 &  0.751 &  0.815 &  0.781 \\
    albert-base-v2 &  0.829 &  0.820 &  0.817 &  0.865 &  0.815 &  0.839 &  0.864 &  0.687 &  0.766 &  0.758 &  0.958 &  0.846 \\
 bert-base-uncased &  0.818 &  0.790 &  0.789 &  0.934 &  0.710 &  0.807 &  0.832 &  0.687 &  0.753 &  0.689 &  0.973 &  0.806 \\
    albert-base-v2 &  0.791 &  0.835 &  0.807 &  0.842 &  0.795 &  0.818 &  0.878 &  0.827 &  0.852 &  0.654 &  0.884 &  0.752 \\
\bottomrule
\end{tabular}



In [133]:
df.columns

Index(['model', 'l', 'b', 's', 'epoch', 'batch', 'lr', 'len', 'bf16', 'p', 'r',
       'f1', '0-p', '0-r', '0-f1', '1-p', '1-r', '1-f1', '2-p', '2-r', '2-f1'],
      dtype='object')