In [None]:
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from resources.ml_mlp import parse_tensorboard
from IPython.display import display

In [None]:
%pwd
%cd ../../../ml/Tensorboard/

folder = r'MLP/hparam_test_4/'

# folder = r'MLP_WIN/hparam_test_2/D_0.1'
# folder = r'MLP_WIN/hparam_test'

In [None]:
path = os.path.join(folder, r'Results/hparams_table.csv')
results = pd.read_csv(path)
# results.sort_values(by=['epochs','no_nodes']).reset_index(drop=True)

In [None]:
metrics = ['CV MAE (µm)',
         'CV MSE (µm²)',
         'CV R²',
         'CV Std MAE (± µm)',
         'CV Std MSE (± µm²)',
         'CV Std R² (±)',
         'Val MAE (µm)',
         'Val MSE (µm²)',
         'Val R²']

hparams = [hp for hp in results.keys() if hp not in metrics and results[hp].nunique() != 1]
hparams

Attempt to load in scalar data to pandas

In [None]:
scalars = ['cv_iter/mse', 'cv_iter/mae', 'cv_iter/r2']

dirs = os.listdir(folder)
a = ['MLP_Win-', 'MLP-', 'LSTM-']
dirs = [x for x in dirs if any(a in x for a in a)]
dirs.sort()

In [None]:
df = {x: parse_tensorboard(os.path.join(folder, x), scalars)
                        for x in dirs}
df = pd.concat(df.values(), keys=df.keys())

results['trial_id'] = df.index.levels[0]
for col in df.columns.values:
    if 'mse' in col:
        scale = 1e6
    elif 'mae' in col:
        scale = 1e3
    else:
        scale = 1
            
    data = [df.loc[trial_id][col].values * scale for trial_id in df.index.levels[0]]
    results[col] = data
results

In [None]:
def cv_iter_boxplot(results, hparam, scalars):
    test = [results.groupby(hparam)[scalar].apply(np.hstack) for scalar in scalars]    
    test = pd.concat(test, axis=1)
    fig, axes = plt.subplots(1, 3)
    for i, ax in enumerate(axes):
        ax.boxplot(x=test.iloc[:,i], labels=test.index.values, showfliers=False, medianprops={'color': 'r'})
        ax.set_xlabel(hparam)
        # ax.set_ylabel(test.columns.values[i])
        ax.tick_params('x', labelrotation=90)
    
    fig.suptitle(f'CV scores by {hparam}')
    axes[0].set_title('Val MSE (µm²)')
    axes[1].set_title('Val MAE (µm)')
    axes[2].set_title('Val R²')
    plt.tight_layout()

for hparam in hparams:
    cv_iter_boxplot(results, hparam, scalars)


    

In [None]:
def plt_boxplot(result_df, hparam):
    plt.rc('font', weight='normal')
    axes = result_df.boxplot(column=['Val MSE (µm²)', 'Val MAE (µm)', 'Val R²'],
                      by=hparam,
                      layout=(1,3),
                      rot=90,
                      sharey=False,
                      showfliers=True,
                      grid=False,
                      color=dict(boxes='k', whiskers='k', medians='r', caps='k'),
                     )
    plt.suptitle(f'Validation scores by {hparam}')
    plt.tight_layout()

for hparam in hparams:
    plt_boxplot(results, hparam)

In [None]:
def mean_hparam(result_df, hparam):
    other_hp = [hp for hp in hparams if hp is not hparam]
    result_df = result_df.groupby([hparam]).mean(numeric_only=True).drop(columns=other_hp, errors='ignore')
    display(result_df.loc[:,result_df.nunique()!=1])
    
for hparam in hparams:
    mean_hparam(results, hparam)