In [1]:
import sys
import json
from copy import deepcopy
from os.path import join
from os.path import isfile
from os.path import abspath
from os.path import basename
from icecream import ic
from glob import glob
from tabulate import tabulate

sys.path.insert(0, "../scs")

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats
from scipy.signal import savgol_filter
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import ParameterGrid

In [2]:
def generate_param_grid():
    hp = {
        "phase_range_start": -20,
        "phase_range_end": 50,
        "ptp_range_start": 0.1,
        "ptp_range_end": 100,
        "wvl_range_start": 4500,
        "wvl_range_end": 7000,

        "train_frac": 0.50,
        "noise_scale": 0.1,
        "spike_scale": 1.0,
        "max_spikes": 3,

        "num_quantiles": 9,
        "low_outlier": 0.01,
        "high_outlier": 0.99,
        "PE_period": 10_000,

        "lr0": 1e-6,
        
        "encoder_blocks": 1,
        "encoder_heads": 8,
        "encoder_key_dim": 8,
        "encoder_proj_dim": 2048,
        "encoder_dropout_attention": 0,
        "encoder_dropout_projection": 0,
        "feed_forward_dropout": 0,
        "feed_forward_units": [128],
        "feed_forward_activation": "relu",
        "encoder_kreg_l1_att": 0,
        "encoder_kreg_l2_att": 0,
        "encoder_breg_l1_att": 0,
        "encoder_breg_l2_att": 0,
        "encoder_areg_l1_att": 0,
        "encoder_areg_l2_att": 0,
        "encoder_kreg_l1_proj1": 0,
        "encoder_kreg_l2_proj1": 0,
        "encoder_breg_l1_proj1": 0,
        "encoder_breg_l2_proj1": 0,
        "encoder_areg_l1_proj1": 0,
        "encoder_areg_l2_proj1": 0,
        "encoder_kreg_l1_proj2": 0,
        "encoder_kreg_l2_proj2": 0,
        "encoder_breg_l1_proj2": 0,
        "encoder_breg_l2_proj2": 0,
        "encoder_areg_l1_proj2": 0,
        "encoder_areg_l2_proj2": 0,
        "feed_forward_kreg_l1": 0,
        "feed_forward_kreg_l2": 0,
        "feed_forward_breg_l1": 0,
        "feed_forward_breg_l2": 0,
        "feed_forward_areg_l1": 0,
        "feed_forward_areg_l2": 0,

        "epochs": 10_000,
        "batch_size": 8,
    }
    
    hp = {key: [val] for key, val in hp.items()}
    
    hp["encoder_blocks"] = [1, 2, 3, 4, 5, 6, 7, 8]
    hp["feed_forward_units"] = [[128], [1024, 1024, 1024], [1024, 256]]
    hp["feed_forward_activation"] = ["relu", "sigmoid", "tanh", "elu", "leaky_relu", "relu6", "linear", "silu"]
    
    
    return ParameterGrid(hp)

In [3]:
PG = generate_param_grid()
len(PG)

192

In [4]:
model_dirs = sorted(glob("/lustre/lrspec/users/2649/models/batch09/*"))
# model_dirs

In [5]:
metrics_list = []
hp_list = []
num_epochs_trained_list = []
final_lr_list = []

for i, model_dir in enumerate(model_dirs):
    PG_index = int(basename(model_dir).split("_")[1])
    
    if not isfile(join(model_dir, "metrics.json")):
        continue
    
    with open(join(model_dir, "metrics.json"), "r") as f:
        metrics = json.load(f)
        metrics = {
            "trn_ls": metrics["trn_set_metrics"]["loss"],
            "trn_ca": metrics["trn_set_metrics"]["ca"],
            "trn_f1": metrics["trn_set_metrics"]["f1"],
            "tst_ls": metrics["tst_set_metrics"]["loss"],
            "tst_ca": metrics["tst_set_metrics"]["ca"],
            "tst_f1": metrics["tst_set_metrics"]["f1"],
        }
        metrics = pd.DataFrame(metrics, index=[PG_index])
        metrics_list.append(metrics)
    
    with open(join(model_dir, "hyperparameters.json"), "r") as f:
        hp = json.load(f)
        hp = {str(key): str(val) for key, val in hp.items()}
        hp = pd.DataFrame(hp, index=[PG_index])
        hp_list.append(hp)
        
    with open(join(model_dir, "history.json"), "r") as f:
        history = json.load(f)
        num_epochs_trained = len(history["epoch"])
        num_epochs_trained_list.append(num_epochs_trained)
        final_lr = history["lr"][-1]
        final_lr_list.append(final_lr)

df_metrics = pd.concat(metrics_list)
df_hp = pd.concat(hp_list)
df_metrics["num_epochs_trained"] = num_epochs_trained_list
df_metrics["final_lr"] = final_lr_list

In [6]:
def avg_stuff(df_metrics, df_hp, col):
    params, counts = np.unique(df_hp[col], return_counts=True)
    if len(params) == 1:
        return
    
    headers = [
        "Param",
        "Count",
        "Trn F1",
        "Tst F1",
        "Trn Acc",
        "Tst Acc",
        "Trn Loss",
        "Tst Loss",
        "Num Epochs Trained",
        "Final lr",
    ]
    rows = []
    for param, count in zip(params, counts):
        
        sub_df = df_metrics[df_hp[col] == param]
        row = [
            param,
            count,
            f"{np.mean(sub_df['trn_f1']):.2f} ± {np.std(sub_df['trn_f1']):.2f}",
            f"{np.mean(sub_df['tst_f1']):.2f} ± {np.std(sub_df['tst_f1']):.2f}",
            f"{np.mean(sub_df['trn_ca']):.2f} ± {np.std(sub_df['trn_ca']):.2f}",
            f"{np.mean(sub_df['tst_ca']):.2f} ± {np.std(sub_df['tst_ca']):.2f}",
            f"{np.mean(sub_df['trn_ls']):.2f} ± {np.std(sub_df['trn_ls']):.2f}",
            f"{np.mean(sub_df['tst_ls']):.2f} ± {np.std(sub_df['tst_ls']):.2f}",
            f"{np.mean(sub_df['num_epochs_trained']):.2f} ± {np.std(sub_df['num_epochs_trained']):.2f}",
            f"{np.mean(sub_df['final_lr']):.2e} ± {np.std(sub_df['final_lr']):.2e}", 
        ]
        rows.append(row)
    
    print(col)
    print(tabulate(rows, headers=headers))
    print()
    
    return


def avg_all(df_metrics, df_hp):

    for col in df_hp.columns:
        avg_stuff(df_metrics, df_hp, col)
        
avg_all(df_metrics, df_hp)

encoder_blocks
  Param    Count  Trn F1       Tst F1       Trn Acc      Tst Acc      Trn Loss     Tst Loss     Num Epochs Trained    Final lr
-------  -------  -----------  -----------  -----------  -----------  -----------  -----------  --------------------  -------------------
      1       21  0.98 ± 0.01  0.44 ± 0.02  0.98 ± 0.01  0.76 ± 0.01  0.11 ± 0.03  0.86 ± 0.04  106.95 ± 46.17        7.50e-07 ± 0.00e+00
      2        6  0.99 ± 0.01  0.44 ± 0.02  0.99 ± 0.01  0.77 ± 0.01  0.09 ± 0.02  0.85 ± 0.03  77.00 ± 10.36         7.50e-07 ± 0.00e+00

feed_forward_activation
Param         Count  Trn F1       Tst F1       Trn Acc      Tst Acc      Trn Loss     Tst Loss     Num Epochs Trained    Final lr
----------  -------  -----------  -----------  -----------  -----------  -----------  -----------  --------------------  -------------------
elu               5  0.98 ± 0.01  0.43 ± 0.01  0.98 ± 0.01  0.76 ± 0.01  0.13 ± 0.02  0.87 ± 0.02  86.80 ± 34.52         7.50e-07 ± 0.00e+00
leaky_r