In [1]:
import csv
import re
import pathlib

epoch_pattern = re.compile(r"^starting epoch:\s*(\d+)")
starting_pattern = re.compile(r"^Starting training.*")
train_metrics_pattern = re.compile(r"Train -\s+loss:\s*([\d.]+)\s+top-1:\s*([\d.]+)\s+top-5:\s*([\d.]+)\s+top-10:\s*([\d.]+)")
eval_metrics_pattern = re.compile(r"Eval -\s+loss:\s*([\d.]+)\s+top-1:\s*([\d.]+)\s+top-5:\s*([\d.]+)\s+top-10:\s*([\d.]+)")

name_var_pattern = re.compile(r"-?((?P<varname>[a-zA-Z\-_]+)=(?P<value>\-?\d+\.?\d*(e\-?\d+)?))")
name_pattern = re.compile(r"^(?P<exp_group>[\d]+)?\-?(?P<Layer>.+)(?P<Position>Pre|Post|Both)(?P<DyTfn>.*tanh)?(?P<Vars>(([a-zA-Z\-_]+)=([\d\.e\-]+))*)$")


def parse_name(name):
    out = {"exp_group":1, 'lr':1e-4, 'dim_hidden':256}
    search = name_pattern.search(name)
    if search is None:
        out["Layer"] = "Identity"
        out["Position"] = "Both"
        return out
    out["Layer"] = search.group("Layer")
    if out["Layer"]=="DyT":
        out["Alpha"] = 0.5
        out["DyTfn"] = "Tanh"
    out["Position"] = search.group("Position")
    if search.group("DyTfn") is not None:
        out["DyTfn"] = search.group("DyTfn")
    if search.group("exp_group") is not None:
        out["exp_group"] = int(search.group("exp_group"))
    out.update({e[1]:float(e[2]) for e in name_var_pattern.findall(search.group("Vars"))})
    return out

def read_log(folder):
    data_rows = []
    current_epoch = None
    train_metrics = {}
    eval_metrics = {}
    english = None
    french = None
    current_model = folder.name
    smallest_epoch = None
    
    with open(folder/"training_logs.log", "r", encoding="utf-8") as f:
        current_model_data = []
        for line in f.readlines():
            line = line.strip()
            
            if match := starting_pattern.match(line):
                continue

            if match := epoch_pattern.match(line):
                if smallest_epoch is None:
                    smallest_epoch = int(match.group(1))
                current_epoch = int(match.group(1))-smallest_epoch
                continue

            if match := train_metrics_pattern.match(line):
                train_metrics = {
                    "Train Loss": float(match.group(1)),
                    "Train Top-1": float(match.group(2)),
                    "Train Top-5": float(match.group(3)),
                    "Train Top-10": float(match.group(4)),
                }
                continue

            if match := eval_metrics_pattern.match(line):
                eval_metrics = {
                    "Eval Loss": float(match.group(1)),
                    "Eval Top-1": float(match.group(2)),
                    "Eval Top-5": float(match.group(3)),
                    "Eval Top-10": float(match.group(4)),
                }
                continue

            if english is None:
                english = line
                continue
            elif french is None:
                french = line
                row = {
                    **parse_name(current_model),
                    "Exp_full_name": current_model,
                    "Epoch": current_epoch,
                    "Ground Truth": english,
                    "Prediction": french,
                    **train_metrics,
                    **eval_metrics,
                }
                current_model_data.append(row)
                english = None
                french = None
                continue

    data_rows.extend(current_model_data)
    current_model_data = []
    return data_rows



In [2]:
runs = [e for e in pathlib.Path("logs").glob("*") if e.is_dir()]

In [3]:
logs = []
for e in runs: 
    logs += read_log(e)

In [4]:
data = {k:[] for e in logs for k in e.keys()}

for e in logs:
    for k in data.keys():
        data[k].append(e.get(k))


In [5]:
import pandas as pd

In [6]:
cols = ["Layer","Position","Epoch","Train Loss","Eval Loss","Alpha","DyTfn","exp_group","lr","dim_hidden","Exp_full_name","Ground Truth","Prediction","Train Top-1","Train Top-5","Train Top-10","Eval Top-1","Eval Top-5","Eval Top-10"]

df = pd.DataFrame(data, columns=cols)
df.to_csv("out.csv", index=False)

In [7]:
df.describe()

Unnamed: 0,Epoch,Train Loss,Eval Loss,Alpha,exp_group,lr,dim_hidden,Train Top-1,Train Top-5,Train Top-10,Eval Top-1,Eval Top-5,Eval Top-10
count,280.0,280.0,280.0,190.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0
mean,2.0,3.165,3.44925,0.472368,1.732143,0.00012,269.714286,0.459036,0.645607,0.701429,0.451393,0.625536,0.67825
std,1.416746,1.416747,2.252221,0.295177,0.835826,8.6e-05,57.746732,0.190482,0.192012,0.177365,0.206866,0.226862,0.217559
min,0.0,1.16,1.29,0.05,1.0,2.5e-05,256.0,0.09,0.26,0.33,0.03,0.07,0.09
25%,1.0,2.0075,1.93,0.5,1.0,0.0001,256.0,0.34,0.52,0.59,0.3175,0.475,0.545
50%,2.0,2.78,2.68,0.5,1.0,0.0001,256.0,0.515,0.7,0.75,0.52,0.71,0.76
75%,3.0,4.1325,4.4525,0.5,2.25,0.0001,256.0,0.61,0.8,0.85,0.62,0.81,0.85
max,4.0,5.89,14.88,1.0,3.0,0.0004,512.0,0.73,0.91,0.94,0.71,0.89,0.92


In [8]:
import numpy as np

In [None]:
df.loc[df.query("Epoch==4 & exp_group==3 & Layer=='DyT' & Position=='Pre'").groupby(["DyTfn"])['Eval Loss'].idxmin()]

In [None]:
df.loc[df.query("Epoch==4 & exp_group==2 & Layer=='DyT'").groupby(["DyTfn"])['Eval Loss'].idxmin()]


Unnamed: 0,Layer,Position,Epoch,Train Loss,Eval Loss,Alpha,DyTfn,exp_group,lr,dim_hidden,Exp_full_name,Ground Truth,Prediction,Train Top-1,Train Top-5,Train Top-10,Eval Top-1,Eval Top-5,Eval Top-10
254,DyT,Pre,4,1.19,1.34,0.5,Hardtanh,3,0.0001,256.0,3-DyTPreHardtanhAlpha-dim_hidden=512,It's more trouble than it's worth.,C'est plus grand que ça en vaut la peine.,0.73,0.9,0.93,0.7,0.88,0.91
259,DyT,Pre,4,1.19,1.34,0.5,LeakyHardtanh,3,0.0001,256.0,3-DyTPreLeakyHardtanhAlpha-dim_hidden=512,It's more trouble than it's worth.,C'est plus grand que ça en vaut la peine.,0.73,0.9,0.93,0.7,0.88,0.91
234,DyT,Pre,4,1.18,1.31,0.5,Tanh,3,0.0004,256.0,3-DyTPre-lr=0.0004,We're not really sure.,Nous ne sommes pas vraiment sûr.,0.72,0.9,0.93,0.7,0.89,0.92


In [10]:
a = list(df.query("Epoch==4").groupby(["Layer"]).agg({"Eval Loss": np.min}).values.flatten())

  a = list(df.query("Epoch==4").groupby(["Layer"]).agg({"Eval Loss": np.min}).values.flatten())


In [11]:
df[df["Eval Loss"].apply(lambda x : x in a)]

Unnamed: 0,Layer,Position,Epoch,Train Loss,Eval Loss,Alpha,DyTfn,exp_group,lr,dim_hidden,Exp_full_name,Ground Truth,Prediction,Train Top-1,Train Top-5,Train Top-10,Eval Top-1,Eval Top-5,Eval Top-10
14,BatchNorm,Pre,4,1.7,1.69,,,1,0.0001,256.0,BatchNormPre,We're not really sure.,Nous ne sommes pas vraiment sûre.,0.65,0.84,0.88,0.65,0.84,0.88
109,Identity,Both,4,1.74,1.71,,,1,0.0001,256.0,Identity,We're not really sure.,Nous ne sommes pas vraiment sûre.,0.64,0.83,0.87,0.65,0.84,0.87
124,Layernorm,Pre,4,1.71,1.7,,,1,0.0001,256.0,LayernormPre,We're not really sure.,Nous ne sommes pas vraiment sûres.,0.65,0.84,0.88,0.65,0.84,0.88
139,RMSNorm,Pre,4,1.71,1.7,,,1,0.0001,256.0,RMSNormPre,We're not really sure.,Nous ne sommes pas vraiment sûres.,0.65,0.84,0.88,0.65,0.84,0.88
162,Layernorm,Post,2,1.77,1.71,,,2,0.0002,256.0,2-LayernormPost-lr=0.0002,I think this isn't correct.,Je ne pense pas que ce soit correct.,0.64,0.83,0.87,0.65,0.84,0.87
227,DyT,Pre,2,1.74,1.71,0.5,Tanh,3,0.0002,256.0,3-DyTPre-lr=0.0002,I think this isn't correct.,Je ne pense pas que ce soit correct.,0.64,0.83,0.87,0.64,0.84,0.88
234,DyT,Pre,4,1.18,1.31,0.5,Tanh,3,0.0004,256.0,3-DyTPre-lr=0.0004,We're not really sure.,Nous ne sommes pas vraiment sûr.,0.72,0.9,0.93,0.7,0.89,0.92
269,Layernorm,Pre,4,1.16,1.29,,,3,0.0004,256.0,3-LayernormPre-lr=0.0004,We're not really sure.,Nous ne sommes pas vraiment sûrs.,0.73,0.91,0.94,0.71,0.89,0.92


In [12]:
df.query("Epoch==4 & exp_group==1").groupby(["Layer", "Position"]).agg({"Eval Loss": np.min})

  df.query("Epoch==4 & exp_group==1").groupby(["Layer", "Position"]).agg({"Eval Loss": np.min})


Unnamed: 0_level_0,Unnamed: 1_level_0,Eval Loss
Layer,Position,Unnamed: 2_level_1
BatchNorm,Both,1.73
BatchNorm,Post,1.74
BatchNorm,Pre,1.69
DyT,Both,3.23
DyT,Post,3.12
DyT,Pre,1.78
Identity,Both,1.71
Layernorm,Both,1.77
Layernorm,Post,1.77
Layernorm,Pre,1.7
