In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import pickle
import warnings

from scipy import stats

# Ignore all warnings
warnings.filterwarnings('ignore')

In [3]:
full_analysis = False
if full_analysis: 
    with open('results_rob.pkl', 'rb') as f:
        results = pickle.load(f)
else:
    with open('results_rob_paper.pkl', 'rb') as f:
        results = pickle.load(f)

In [17]:
setup_names = {}
base_names = ['FCER', 'CCER', 'PFCER', 'PCCER']
norm_names = ['', ' Dist', ' Std', ' Abs', ' Var']
for i, base in enumerate(['ce', 'cce', 'pce', 'pcce']):
    for j, setup in enumerate(['', '_dist', '_std', '_abs', '_var']):
        setup_names[base + setup] = base_names[i] + norm_names[j]
setup_names

{'ce': 'FCER',
 'ce_dist': 'FCER Dist',
 'ce_std': 'FCER Std',
 'ce_abs': 'FCER Abs',
 'ce_var': 'FCER Var',
 'cce': 'CCER',
 'cce_dist': 'CCER Dist',
 'cce_std': 'CCER Std',
 'cce_abs': 'CCER Abs',
 'cce_var': 'CCER Var',
 'pce': 'PFCER',
 'pce_dist': 'PFCER Dist',
 'pce_std': 'PFCER Std',
 'pce_abs': 'PFCER Abs',
 'pce_var': 'PFCER Var',
 'pcce': 'PCCER',
 'pcce_dist': 'PCCER Dist',
 'pcce_std': 'PCCER Std',
 'pcce_abs': 'PCCER Abs',
 'pcce_var': 'PCCER Var'}

In [18]:
stab_timer = results['housing']['RF']['stab_timer']
rob_timer = results['housing']['RF']['rob_timer']

stab = 'Stability'
rob = 'Robustness'
timer = {}
timer[stab] = {}
timer[rob] = {}
for setup in stab_timer.keys():
    timer[stab][setup_names[setup]] = np.mean(stab_timer[setup]) / 10 # number of instances 
    timer[rob][setup_names[setup]] = np.mean(rob_timer[setup]) / 10 # number of instances 

timer = pd.DataFrame(timer)
timer

Unnamed: 0,Stability,Robustness
FCER,0.2573499,0.2662939
CCER,0.3986868,0.3818672
FCER Var,0.7955051,0.8160611
CCER Var,1.156076,1.137427
PFCER,0.624667,0.6406583
PCCER,0.9211112,0.8948543
PFCER Var,1.185845,1.204821
PCCER Var,1.71807,1.656689


In [19]:
stability = results['housing']['RF']['stability']
robustness = results['housing']['RF']['robustness']

stab = 'Stability'
rob = 'Robustness'
performance = {}
performance[stab] = {}
performance[rob] = {}
performance['Prediction Variance'] = np.mean([np.var([robustness['predict'][i][j] for i in range(len(robustness['predict']))]) for j in range(len(robustness['predict'][0]))])
for setup in stability.keys():
    # Get the most important feature for each instance using the absolute value of the feature weights
    stab_feature = []
    rob_feature = []
    for j in range(len(stability[setup][0])): # number of instances        
        stab_feature.append([np.argmax(np.abs(stability[setup][i][j]['predict'])) for i in range(len(stability[setup]))])
        rob_feature.append([np.argmax(np.abs(robustness[setup][i][j]['predict'])) for i in range(len(robustness[setup]))])
    stab_feature = stats.mode(stab_feature, axis=1)
    rob_feature = stats.mode(rob_feature, axis=1)
    
    
    performance[stab][setup_names[setup]] = np.mean([np.var([stability[setup][i][j]['predict'][stab_feature.mode[j]] for i in range(len(stability[setup]))]) for j in range(len(stability[setup][0]))])
    performance[rob][setup_names[setup]] = np.mean([np.var([robustness[setup][i][j]['predict'][rob_feature.mode[j]] for i in range(len(robustness[setup]))]) for j in range(len(robustness[setup][0]))])

performance = pd.DataFrame(performance)
pd.options.display.float_format = "{:e}".format
performance

Unnamed: 0,Stability,Robustness,Prediction Variance
FCER,1.047706e-32,0.008010681,9.068915e-05
CCER,1.8488930000000003e-33,0.002053705,9.068915e-05
FCER Var,1.294225e-32,0.007278605,9.068915e-05
CCER Var,1.040002e-33,0.001811707,9.068915e-05
PFCER,0.002543178,0.03600766,9.068915e-05
PCCER,0.002387989,0.0137577,9.068915e-05
PFCER Var,0.001821867,0.03044382,9.068915e-05
PCCER Var,0.002901287,0.009606847,9.068915e-05
