In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os
import sys
sys.path.append('../')
from lib.metrics import utils
from scipy.optimize import minimize
import json
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from copy import copy
from sklearn.metrics import f1_score

root_dir = Path('/data/healthy-ml/scratch/haoran/results/MultimodalDiscordance/results/multimodal_knn_caption/')

def condense_dict(k, d):
    new_d = {}
    for i in d:
        if isinstance(d[i], dict):
            new_dict = condense_dict(i + '_', d[i])
            new_d = {**new_d, **{k + j: new_dict[j] for j in new_dict}}
        else:
            new_d[k + i] = d[i]
    return new_d

RELOAD_DF = False

In [None]:
results, dfs = [], {}
if RELOAD_DF or not Path('./all_res_df.pkl').is_file():
    for i in tqdm(root_dir.glob('**/done')):
        args = json.load((i.parent/'args.json').open('r'))
        final_res = pickle.load((i.parent/'res.pkl').open('rb')) 
        # dfs[args['output_dir']] = final_res['df']
        selected_row = condense_dict('', final_res['agg_results'])
        row = {**args, **selected_row}
        results.append(row)
    df = pd.DataFrame(results)
    df['output_dir_small'] = df['output_dir'].apply(lambda x: Path(x).name)
    df.to_pickle('./all_res_df.pkl')
else:
    print("Loading saved dataframe...")
    df = pd.read_pickle('./all_res_df.pkl')

In [None]:
df = df[(df.ablation == 'none') | ((df.ablation == 'multimodal_baseline') & (df.knn_k == 1))]

In [None]:
SELECTION_METRIC = 'know_val_labels_val_F1_optimal'
SELECTION_METHOD = '_'.join(SELECTION_METRIC.split('_')[:3]) if SELECTION_METRIC.startswith('know') else SELECTION_METRIC.split('_')[0]

In [None]:
hparams = ['knn_k', 'dist_type']

avg_perfs = (df.groupby(['dataset', 'noise_type', 'noise_level', 'ablation'] + hparams, dropna = False)
             .agg(performance = (SELECTION_METRIC, 'mean'))
             .reset_index())

# get configs with best perfs
best_models = (avg_perfs.groupby(['dataset', 'noise_type', 'noise_level', 'ablation'], dropna = False)
               .agg(performance = ('performance', 'max'))
               .merge(avg_perfs)
               .drop_duplicates(subset = ['dataset', 'noise_type', 'noise_level', 'ablation']))

# take subset of df with best perfs
selected_configs = (
    best_models.drop(columns = ['performance'])
    .dropna(axis=1, how='all').merge(df)
)

In [None]:
examine_metrics = [
    'AUROC', 'AUPRC'
]

if SELECTION_METHOD == 'know_val_labels':
    examine_metrics.append('F1_optimal')
elif SELECTION_METHOD == 'know_val_prev':
    examine_metrics.append('F1_prev')
elif SELECTION_METHOD == 'heuristic':
    examine_metrics.append('F1_heuristic')

temp = (selected_configs[['dataset', 'noise_type', 'noise_level', 'ablation'] + [SELECTION_METHOD + '_test_' + i for i in examine_metrics]
                + [SELECTION_METHOD + '_' + i for i in ('beta', 'gamma', 'tau_1_n', 'tau_2_n', 'tau_1_m', 'tau_2_m')] + ['knn_k', 'dist_type', 'output_dir_small']]
.rename(columns = {SELECTION_METHOD + '_test_' + i: i for i in examine_metrics})
.rename(columns =  {SELECTION_METHOD + '_' + i: i for i in ('beta', 'gamma', 'tau_1_n', 'tau_2_n', 'tau_1_m', 'tau_2_m')})
 .rename(columns = {'ablation': 'method'})
)
temp['method'] = temp['method'].map({
    'multimodal_baseline': 'CLIP Base',
    'none': 'Ours'
})

group_cols = examine_metrics # [i for i in temp.columns if i not in ['dataset', 'noise_type', 'noise_level', 'method']]

temp.groupby(['dataset', 'noise_type', 'noise_level', 'method']).agg({**{i: ['mean','std'] for i in group_cols + ['knn_k']}, **{'dist_type': lambda x: x.iloc[0]}}).sort_index()