In [77]:
import os
from typing import Optional, Literal

import optuna
import json
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from deepdiff import DeepDiff
import itertools
import torch
from typing_extensions import LiteralString

from settings.config import *
from src.commons.exp_config import ExpConfig

pio.templates.default = "plotly"

idx = pd.IndexSlice

In [78]:
exp_dir = os.path.join(EXPERIMENTS_PATH, "basic")
exp_name = "resnets_htuning_sel{10k}_test"

TRIAL_TO_TEST = 3

#### Loading metrics from tests

In [79]:
# loading data from local saves [general code]

# general_config = HTunerExpConfig.load_from_file(str(os.path.join(exp_dir, exp_name, HTUNER_CONFIG_FILE)))
TRIAL_NAME_PREFIX = "trial_"

trials_metrics_array = []
for trial_id in range(TRIAL_TO_TEST): 
    trial_dir = os.path.join(exp_dir, exp_name, f"{TRIAL_NAME_PREFIX}{trial_id}")

    metrics = pd.read_csv(os.path.join(trial_dir, "metrics.csv"))
    
    #remove params columns
    metrics_columns = [col for col in metrics.columns if "val" in col] + [col for col in metrics.columns if "train" in col] + ["epoch", "step"] 
    metrics = metrics[metrics_columns]
    
    metrics = metrics.dropna(how="all", axis=0, subset=[c for c in metrics.columns if c not in ['step']])  # remove NaN rows (used as separators)
    metrics = metrics.ffill().bfill()  # fill NaN values with previous values
    metrics = metrics.groupby("epoch").last()  # take last value of each epoch
    metrics.index = metrics.index.astype(int)
    
    trials_metrics_array.append(metrics.drop(columns=["step"]))

In [80]:
full_metrics = pd.concat({i: trial for i, trial in enumerate(trials_metrics_array)}, axis=0)
full_metrics.index = full_metrics.index.rename(["trial", "epoch"])
full_metrics.to_csv(os.path.join(exp_dir, exp_name, "full_metrics.csv"))

#### Loading trials configuration from old experiment

In [81]:
old_exp_dir, old_exp_name = os.path.join(EXPERIMENTS_PATH, "basic"), "resnets_htuning_sel{10k}"


trial_info = pd.read_csv(str(os.path.join(old_exp_dir, old_exp_name, "best_trials.csv")), index_col=0).iloc[:TRIAL_TO_TEST]
trial_info = trial_info.drop(columns = [col for col in trial_info.columns if ("val" in col or "train" in col)] + ["state", "best_epoch"])
trial_info = trial_info.reset_index().rename(columns={"trial": "old_trial"})
trial_info.index = pd.Index(range(TRIAL_TO_TEST), name="trial")
trial_info.to_csv(os.path.join(exp_dir, exp_name, "trials_configuration.csv"))

In [82]:
full_f1 = full_metrics.filter(regex="val_f1")
full_f1.index.names = ["trial", "epoch"]
full_f1.columns = [int(col.replace("val_f1_label_", "")) for col in full_f1.columns]
full_f1.columns.name = "label"
full_f1 = full_f1.sort_index(axis=1)
full_f1.to_csv(os.path.join(exp_dir, exp_name, "full_f1.csv"))

In [83]:
f1_metrics = full_f1.droplevel(1)
f1_metrics

label,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,40
trial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.325088,0.179375,0.332085,0.18619,0.33612,0.419199,0.251239,0.35423,0.608634,0.438059,...,0.791941,0.325427,0.536077,0.148383,0.525571,0.625991,0.490473,0.541051,0.107938,0.0
1,0.331735,0.146993,0.317841,0.178105,0.374182,0.390232,0.242819,0.326107,0.630382,0.44061,...,0.793038,0.316114,0.51366,0.223907,0.519542,0.616464,0.405092,0.502221,0.114103,0.0
2,0.329939,0.156473,0.331798,0.243272,0.383305,0.40635,0.21701,0.377183,0.625041,0.45484,...,0.771776,0.314986,0.494906,0.12617,0.511393,0.627837,0.405176,0.487109,0.087578,0.0


In [84]:
trial_info

Unnamed: 0_level_0,old_trial,lr,lr_scheduler,optimizer,model_type,image_augmentation,weight_decay,weighted_loss
trial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,64,0.000292,ConstantStartReduceOnPlateau,adam,resnet18Pre,False,0.000297,False
1,66,0.000126,ConstantStartReduceOnPlateau,adam,resnet18Pre,False,0.000821,False
2,68,0.00014,ConstantStartReduceOnPlateau,adam,resnet18Pre,True,0.000257,False


#### Loading label encoder from old experiment for translating labels

In [85]:
configs = ExpConfig.load_from_file(str(os.path.join(old_exp_dir, old_exp_name, HTUNER_CONFIG_FILE))).label_encoder
label_encoder = configs['type'].load_from_config(configs)
label_encoder

<src.data_processing.labels_encoders.MultiLabelBinarizerRobust at 0x10db7a13ee0>

In [86]:
f1_metrics.columns = [label_encoder.decode_labels([label])[0][0] for label in full_f1.columns]
f1_metrics

Unnamed: 0_level_0,avocado,basil,beans,beef,bread,butter,cabbage,carrot,cheese,chicken,...,salt,shrimp,soy,strawberries,sugar,tomato,turmeric,vanilla,yogurt,<UNK>
trial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.325088,0.179375,0.332085,0.18619,0.33612,0.419199,0.251239,0.35423,0.608634,0.438059,...,0.791941,0.325427,0.536077,0.148383,0.525571,0.625991,0.490473,0.541051,0.107938,0.0
1,0.331735,0.146993,0.317841,0.178105,0.374182,0.390232,0.242819,0.326107,0.630382,0.44061,...,0.793038,0.316114,0.51366,0.223907,0.519542,0.616464,0.405092,0.502221,0.114103,0.0
2,0.329939,0.156473,0.331798,0.243272,0.383305,0.40635,0.21701,0.377183,0.625041,0.45484,...,0.771776,0.314986,0.494906,0.12617,0.511393,0.627837,0.405176,0.487109,0.087578,0.0


In [87]:
n_columns = 5
n_rows = len(f1_metrics.columns[:-1]) // n_columns
fig = make_subplots(rows=n_rows, cols=n_columns, subplot_titles=f1_metrics.columns, shared_yaxes=True, shared_xaxes=True)

for i, label in enumerate(f1_metrics.columns[:-1]):
    row = i // n_columns + 1
    col = i % n_columns + 1
    fig.add_trace(go.Bar(x=f1_metrics.index, y=f1_metrics[label], name=label), row=row, col=col)

fig.update_layout(height=1000, width=1000, title_text="F1 score for each label", xaxis_title="Trial", yaxis_title="F1 score"
                  ).update_layout(coloraxis_showscale=False).for_each_trace(lambda trace: trace.update(showlegend=False)).update_yaxes(range=[0, 1])
# fix y-axis range to [0, 1]
fig.show()

In [88]:
px.data.tips()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


#### Compare with old results

In [95]:
old_exp_test_dir, old_exp_test_name = os.path.join(EXPERIMENTS_PATH, "basic"), "resnets_training_BM_F1_INGS"
labels_stats = pd.read_csv(str(os.path.join(old_exp_test_dir, old_exp_test_name, "full_labels_stats.csv")), index_col=[0, 1])

In [107]:
# laoding
label_encoder_configs = []
for trial_id in range(0, 3): 
    checkpoint_data = torch.load(str(os.path.join(old_exp_test_dir, old_exp_test_name, f"{TRIAL_NAME_PREFIX}{trial_id}", "best_model.ckpt")), weights_only=False)
    trial_config = ExpConfig.load_from_ckpt_data(checkpoint_data)
    label_encoder_configs.append(trial_config.label_encoder)
    
# check if they are the same
assert all([DeepDiff(label_encoder_configs[0], labels_encoder_config).to_dict() == {} for labels_encoder_config in label_encoder_configs[1:]])
label_encoder = label_encoder_configs[0]['type'].load_from_config(label_encoder_configs[0])
label_encoder

<src.data_processing.labels_encoders.MultiLabelBinarizerRobust at 0x10db7a3b7c0>

In [124]:
old_avg = labels_stats.xs("mean", level=1).iloc[:3, :].mean()
old_avg.name = "old"
old_avg.index = [label_encoder.decode_labels([int(label)])[0][0] for label in old_avg.index.values]

f1_metrics_avg = f1_metrics.mean(axis=0)
f1_metrics_avg.name = "new"

cmp_df = pd.concat([f1_metrics_avg, old_avg], axis=1).dropna()

In [125]:
cmp_df

Unnamed: 0,new,old
avocado,0.328921,0.12953
basil,0.160947,0.024045
beans,0.327242,0.08516
beef,0.202522,0.035822
bread,0.364536,0.100843
butter,0.40526,0.152381
cabbage,0.237023,0.062406
carrot,0.352507,0.108821
cheese,0.621352,0.414873
chicken,0.444503,0.15014


In [130]:
px.bar(cmp_df, x=cmp_df.index, y=cmp_df.columns, barmode="group", title="F1 score comparison between old and new experiment")