In [None]:
import json
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
results_folder = '../../hyperparameter_tuning/tune_results/2d_tuning_run_4'
results = []
for subfolder in os.listdir(results_folder):
    if os.path.isdir(os.path.join(results_folder, subfolder)):
        result_file = os.path.join(results_folder, subfolder, 'result.json')
        if not os.path.exists(result_file):
            continue
        with open(result_file, 'r') as file:
            try:
                result = json.load(file)
            except json.JSONDecodeError:
                continue
            config = result['config']
            result.update(config)
            result.pop('config')
        results.append(result)
results_df = pd.DataFrame(results)
results_df.head()

In [None]:
results_df['mean_5_fold_ranking_score'] = results_df['mean_5_fold_ranking_score'].map(lambda x: 0.5 if x<0.5 else x)
results_df['balanced_accuracy'] = results_df['balanced_accuracy'].map(lambda x: 0.5 if x<0.5 else x)

In [None]:
hyperparameters = ['learning_rate', 'final_learning_rate', 'momentum', 'weight_decay', 'batch_size', 'label_smoothing', 'x_y_resolution', 'model_key']

fig, ax = plt.subplots(2, len(hyperparameters), figsize=(40, 8))
for i, hyperparameter in enumerate(hyperparameters):
    sns.scatterplot(data=results_df, x=hyperparameter, y='mean_5_fold_ranking_score', hue='model_key', ax=ax[0][i])
    sns.scatterplot(data=results_df, x=hyperparameter, y='balanced_accuracy', hue='model_key', ax=ax[1][i])
    if hyperparameter in ['learning_rate', 'weight_decay', 'final_learning_rate', 'label_smoothing']:
        ax[0][i].set_xscale('log')
        ax[1][i].set_xscale('log')
    if i==0:
        ax[0][i].legend().remove()
        ax[1][i].legend(loc='lower left')
    else:
        ax[0][i].legend().remove()
        ax[1][i].legend().remove()
plt.tight_layout()
plt.show()

In [None]:
results_df['selected_trials'] = (results_df['balanced_accuracy'] > 0.58) \
                              & (results_df['mean_5_fold_ranking_score'] > 0.58) \
                              & (results_df['model_key'] != 'swin3d_t')

In [None]:
sns.scatterplot(data=results_df, x='mean_5_fold_ranking_score', y='balanced_accuracy', hue='selected_trials')
plt.show()

In [None]:
sns.scatterplot(data=results_df, x='mean_5_fold_ranking_score', y='balanced_accuracy', hue='model_key')
plt.show()

In [None]:
results_df[results_df['selected_trials']]

In [None]:
trial_ids = results_df[results_df['selected_trials']]['trial_id'].to_list()

#selected_trial_ids_path = '../../hyperparameter_tuning/selected_trial_ids_2d_tuning_run_4_test.json'
#with open(selected_trial_ids_path, 'w') as file:
#    json.dump(trial_ids, file, indent=4)

# Analyse spread of metrics over 5 folds of each run

In [None]:
trials_folder = '../../hyperparameter_tuning/tune_trials/2d_tuning_run_4'
curve_dfs = []
for subfolder in os.listdir(trials_folder):
    if os.path.isdir(os.path.join(trials_folder, subfolder)):
        loss_file = os.path.join(trials_folder, subfolder, 'loss_log_detailed.csv')
        if os.path.exists(loss_file):
            with open(loss_file, 'r') as file:
                curve_df = pd.read_csv(file)
            curve_df['trial'] = subfolder
            curve_dfs.append(curve_df)
curves_df = pd.concat(curve_dfs)
curves_df = curves_df.reset_index(drop=True)

In [None]:
curves_df['trial_id'] = curves_df['trial'].str.split('_').str[1] + '_' + curves_df['trial'].str.split('_').str[2]
curves_df['fold'] = curves_df['trial'].str.split('_').str[4]
curves_df = curves_df.merge(results_df, on='trial_id', suffixes=("", "_averaged"))
curves_df

In [None]:
best_df = curves_df.loc[curves_df.groupby('trial')['ranking_score'].idxmax()]
best_df['selected_trials'] = best_df['trial_id'].isin(trial_ids)
best_df_best_trials = best_df[best_df['trial_id'].isin(trial_ids)]

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
sns.scatterplot(data=best_df_best_trials, x='ranking_score', y='balanced_accuracy', hue='trial_id', ax=ax[0])
sns.scatterplot(data=best_df_best_trials, x='ranking_score', y='balanced_accuracy', hue='fold', ax=ax[1])
plt.tight_layout()
plt.show()

In [None]:
sns.scatterplot(data=best_df, x='ranking_score', y='balanced_accuracy', hue='fold')
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(1, 5, figsize=(20, 4))
for fold in range(5):
    best_df_fold_i = best_df[best_df['fold'] == str(fold+1)]
    sns.scatterplot(data=best_df_fold_i, x='ranking_score', y='balanced_accuracy', hue='selected_trials', ax=ax[fold])
    ax[fold].set_xlim(0.41, 0.77)
    ax[fold].set_ylim(0.45, 0.68)
    ax[fold].set_title(f'Fold {fold+1}')
plt.tight_layout()
plt.show()

In [None]:
selected_trials_per_fold = []
for fold in range(5):
    best_df_fold_i = best_df[best_df['fold'] == str(fold+1)]
    df_fold_i_candidates = best_df_fold_i.sort_values(by='balanced_accuracy', ascending=False).head(20)
    selected = df_fold_i_candidates.sort_values(by='ranking_score', ascending=False).head(10)['trial'].to_list()
    selected_trials_per_fold.extend(selected)
selected_trials_per_fold.sort()
print(len(selected_trials_per_fold), sorted(selected_trials_per_fold))

selected_trial_ids_path = '../../hyperparameter_tuning/selected_trial_ids_by_fold_2d_tuning_run_4.json'
with open(selected_trial_ids_path, 'w') as file:
    json.dump(selected_trials_per_fold, file, indent=4)


In [None]:
best_df['selected_trials_by_fold'] = best_df['trial'].isin(selected_trials_per_fold)

In [None]:
fig, ax = plt.subplots(1, 5, figsize=(20, 4))
for fold in range(5):
    best_df_fold_i = best_df[best_df['fold'] == str(fold+1)]
    sns.scatterplot(data=best_df_fold_i, x='ranking_score', y='balanced_accuracy', hue='selected_trials_by_fold', ax=ax[fold])
    ax[fold].set_xlim(0.41, 0.77)
    ax[fold].set_ylim(0.45, 0.68)
    ax[fold].set_title(f'Fold {fold+1}')
plt.tight_layout()
plt.show()