In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os
import sys
sys.path.append('../')
from lib.metrics import utils
from scipy.optimize import minimize
import json
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from copy import copy
from sklearn.metrics import f1_score

In [None]:
# captioning best
df = pd.read_pickle('./all_res_df.pkl')
df = df[(df.ablation == 'none')]

hparams = ['knn_k', 'dist_type']

SELECTION_METRIC = 'know_val_labels_val_F1_optimal'

avg_perfs = (df.groupby(['dataset', 'noise_type', 'noise_level', 'ablation'] + hparams, dropna = False)
             .agg(performance = (SELECTION_METRIC, 'mean'))
             .reset_index())

# get configs with best perfs
best_models = (avg_perfs.groupby(['dataset', 'noise_type', 'noise_level', 'ablation'], dropna = False)
               .agg(performance = ('performance', 'max'))
               .merge(avg_perfs)
               .drop_duplicates(subset = ['dataset', 'noise_type', 'noise_level', 'ablation']))

# take subset of df with best perfs
selected_configs = (
    best_models.drop(columns = ['performance'])
    .dropna(axis=1, how='all').merge(df)
)

configs = selected_configs[~selected_configs.dataset.isin(['cifar100', 'cifar10'])]

In [None]:
## choose fixed
selected_models_df = df.query('ablation == "none" and knn_k == 30 and dist_type == "cosine"').set_index('output_dir')

In [None]:
selected_model_dfs = []
for idx, i in tqdm(selected_models_df.iterrows(), total = len(selected_models_df)):
    selected_model_dfs.append(
        {
            'dataset': i['dataset'], 
            'noise_type': i['noise_type'], 
            'noise_level': i['noise_level'],
            'data_seed': i['data_seed'],
            'df': pd.read_pickle(Path(i.name)/'res.pkl')['df']           
        }      
    )

In [None]:
hparam_dict = {
    'beta': 5,
    'gamma': 5,
    'tau_1_n': 0.1,
    'tau_2_n': 5,
    'tau_1_m': 0.1,
    'tau_2_m': 5
} 

In [None]:
res = []
for i in tqdm(selected_model_dfs):
    i['df']['score'] = utils.calc_scores_given_hparams_vectorized(i['df'], hparam_dict)
    df_val = i['df'].query('sset == "val"')
    f1, thres = utils.f1_with_pred_prev_constraint(df_val['is_mislabel'], df_val['score'], 
                                                   pred_prev = df_val['is_mislabel'].sum()/len(df_val), return_thres = True)
    df_test = i['df'].query('sset == "test"')
    mets = utils.prob_metrics(df_test['is_mislabel'], df_test['score'])
    mets['F1'] = f1_score(df_test['is_mislabel'], df_test['score'] >= thres)
    res.append({
        **{a:i[a] for a in i if a != 'df'}, **mets
    })
res_df = pd.DataFrame(res)

In [None]:
res_a = res_df.groupby(['dataset', 'noise_type', 'noise_level']).agg({i: ['mean','std'] for i in ['AUROC', 'AUPRC', 'F1']}).sort_index()

In [None]:
res_b = (configs.rename(
            columns = {
                'know_val_labels_test_AUROC': 'AUROC',
                'know_val_labels_test_AUPRC': 'AUPRC',
                'know_val_labels_test_F1_optimal': 'F1'
            }
        )
         .groupby(['dataset', 'noise_type', 'noise_level'])
                                   .agg({i: ['mean','std'] for i in ['AUROC', 'AUPRC', 'F1']}).sort_index())

In [None]:
new_df = pd.DataFrame()

for i in ['AUROC', 'AUPRC', 'F1']:
    new_df[f'{i}_fixed'] = res_a.apply(lambda x: f'{x[i]["mean"]*100:.1f} ({x[i]["std"]*100:.1f})', axis = 1)
    
for i in ['AUROC', 'AUPRC', 'F1']:
    new_df[f'{i}_optimal'] = res_b.apply(lambda x: f'{x[i]["mean"]*100:.1f} ({x[i]["std"]*100:.1f})', axis = 1)
    
idx1 = res_df.set_index(['dataset', 'noise_type', 'noise_level', 'data_seed']).index.drop_duplicates()
idx2 = configs.set_index(['dataset', 'noise_type', 'noise_level', 'data_seed']).index.drop_duplicates()
idx_common = list(set(idx1).intersection(set(idx2)))

In [None]:
for i in ['AUROC', 'AUPRC', 'F1']:
    r1 = configs.rename(
            columns = {
                'know_val_labels_test_AUROC': 'AUROC',
                'know_val_labels_test_AUPRC': 'AUPRC',
                'know_val_labels_test_F1_optimal': 'F1'
            }
        ).set_index(['dataset', 'noise_type', 'noise_level', 'data_seed']).loc[idx_common, i]
    r1 = r1[~r1.index.duplicated(keep='first')]
    r1 = r1
    
    r2 = res_df.set_index(['dataset', 'noise_type', 'noise_level', 'data_seed']).loc[idx_common, i]
    
    new_df[f'{i}_Gap_mean'] = (r2 - r1).sort_index().groupby(['dataset', 'noise_type', 'noise_level']).mean()
    new_df[f'{i}_Gap_std'] = (r2 - r1).sort_index().groupby(['dataset', 'noise_type', 'noise_level']).std()
    new_df[f'{i}_Gap'] = new_df.apply(lambda x: f'{x[i + "_Gap_mean"]*100:.1f} ({x[i + "_Gap_std"]*100:.1f})', axis = 1)
    # new_df = new_df.drop(columns = [f'{i}_Gap_mean', f'{i}_Gap_std'])

In [None]:
new_df[[j for i in ['AUROC', 'AUPRC', 'F1'] for j in (f'{i}_optimal', f'{i}_fixed', f'{i}_Gap')]]

In [None]:
new_df[f'AUROC_Gap_mean'].mean(), new_df[f'AUROC_Gap_mean'].std()

In [None]:
new_df[f'AUPRC_Gap_mean'].mean(), new_df[f'AUPRC_Gap_mean'].std()

In [None]:
new_df[f'AUROC_Gap_mean'].min()