In [1]:
from fastcore.xtras import load_pickle
import pandas as pd 
import os 
from glob import glob
from pathlib import Path
from scipy.stats import sem
import matplotlib.pyplot as plt
import pycm 

FOLDER = 'out'

In [2]:
if not os.path.exists(FOLDER):
    raise FileExistsError('Cannot find folder {}'.format(FOLDER))

all_res = glob(f'{FOLDER}/*.pkl')
print('Pickles found in {}: {}'.format(FOLDER, len(all_res)))

Pickles found in out: 548


In [3]:
#from sklearn.metrics import matthews_corrcoef

compiled_res_list = []
conf_mats = []

for results in all_res:
    res_full = load_pickle(results)
    res = res_full['results']
    summary = {
        'size': res_full['train_size'],
        'target': res_full['data_summary']['target'],
        'representation': res_full['data_summary']['representation'],
        'accuracy': res['accuracy'],
        #'f1_macro': res['f1_macro'],
        #'f1_micro': res['f1_micro'],
        'kappa': res['kappa'],
        'num_epochs': res_full['config']['tune_settings']['num_train_epochs'],
        'lr': res_full['config']['tune_settings']['learning_rate'],
        'bins': len(set(res['all_y_true'])),
        #'MCC': matthews_corrcoef(res['all_y_true'], res['all_y_pred'])
    }
    confusion_matrix = {
        'all_y_true':res['all_y_true'],
        'all_y_pred':res['all_y_pred']
    }
    conf_mats.append(confusion_matrix)
    compiled_res_list.append(summary)

In [7]:
compiled_res = pd.DataFrame(compiled_res_list)
print(compiled_res['representation'].unique())
#compiled_res = compiled_res[compiled_res['target'] == 'D_CH4_binary']
compiled_res = compiled_res[compiled_res['bins'] == 2]
compiled_res = compiled_res[compiled_res['num_epochs'] == 25]
#compiled_res = compiled_res[compiled_res['size'] == 500]
grouped_res =compiled_res.groupby(['target', 'bins', 'num_epochs', 'representation', 'size']).agg(['mean', 'sem'])


['mofkey' 'mofid']


In [8]:
compiled_res.sort_values(['representation', 'size'])

Unnamed: 0,size,target,representation,accuracy,kappa,num_epochs,lr,bins
19,50,D_CH4_binary,mofid,0.50,0.00,25,0.0003,2
20,50,U_H2_binary,mofid,0.66,0.32,25,0.0003,2
51,50,D_He_binary,mofid,0.56,0.12,25,0.0003,2
53,50,D_H2_binary,mofid,0.54,0.08,25,0.0003,2
72,50,D_N2_binary,mofid,0.58,0.16,25,0.0003,2
...,...,...,...,...,...,...,...,...
420,500,U_CH4_binary,mofkey,0.74,0.48,25,0.0003,2
441,500,D_He_binary,mofkey,0.56,0.12,25,0.0003,2
452,500,D_CH4_binary,mofkey,0.62,0.24,25,0.0003,2
504,500,U_N2_binary,mofkey,0.70,0.40,25,0.0003,2


In [9]:
grouped_res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,accuracy,accuracy,kappa,kappa,lr,lr
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,sem,mean,sem,mean,sem
target,bins,num_epochs,representation,size,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
D_CH4_binary,2,25,mofid,50,0.553333,0.029059,0.106667,0.058119,0.0003,0.0
D_CH4_binary,2,25,mofid,100,0.626667,0.033333,0.253333,0.066667,0.0003,0.0
D_CH4_binary,2,25,mofid,250,0.653333,0.048074,0.306667,0.096148,0.0003,0.0
D_CH4_binary,2,25,mofid,500,0.713333,0.029059,0.426667,0.058119,0.0003,0.0
D_CH4_binary,2,25,mofkey,50,0.520000,0.020000,0.040000,0.040000,0.0003,0.0
...,...,...,...,...,...,...,...,...,...,...
U_N2_binary,2,25,mofid,500,0.740000,0.040000,0.480000,0.080000,0.0003,0.0
U_N2_binary,2,25,mofkey,50,0.513333,0.006667,0.026667,0.013333,0.0003,0.0
U_N2_binary,2,25,mofkey,100,0.573333,0.026667,0.146667,0.053333,0.0003,0.0
U_N2_binary,2,25,mofkey,250,0.666667,0.013333,0.333333,0.026667,0.0003,0.0
