# This Notebook aims to evaluate cross validation result obtained using the train_models_crossval notebook

## First, the predictive performance is considered using the AUC performance metric

In [186]:
## import standard modules for data handling and visualization
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

## import model specific modules
import cplex as cp
import slim_python as slim
import shap
from interpret.glassbox import ExplainableBoostingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

pd.set_option('display.max_columns', None)

In [15]:
cd ..

C:\Users\danie\Documents\StageDaniel


In [16]:
cd research

C:\Users\danie\Documents\StageDaniel\research


In [17]:
def load_models(name):
    with open('results/models/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [19]:
from sklearn import metrics

def extract_results(results, X):
    # Extract specific models from the results

    slim_results = results[0][1]

    rho = slim_results['rho']
    slim_predictions = pred_slim(X,rho)

    ebm = results[1]

    XGboost = results[2]
    explainer = shap.TreeExplainer(XGboost)
    shap_values = explainer.shap_values(X)

    logit = results[3]
    
    return rho, slim_predictions, ebm, explainer, shap_values, logit

## Simple function for getting predictions for a SLIM scoring system
def pred_slim(X, rho):
    return (X.dot(rho[1:])+rho[0]>=0)*1

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def scale_sigmoid(x):
    x_max = np.maximum(x.max(), np.abs(x.min()))
    x = x/(x_max/5)
    return 1/(1 + np.exp(-x))

def auc(y, y_pred):
    fpr, tpr, thresholds = metrics.roc_curve(y, y_pred)
    return metrics.auc(fpr, tpr)

In [20]:
def performance_measures(data, models):
    train_aucs = []
    test_aucs = []
    
    X_train = data['X_train']
    X_test = data['X_test']
    y_train = data['y_train']
    y_test = data['y_test']
    
    slim_results = models[0][1]
    rho = slim_results['rho']
    train_pred = scale_sigmoid(X_train.dot(rho[1:])+rho[0])
    test_pred = sigmoid(X_test.dot(rho[1:])+rho[0])
    train_aucs.append(auc(y_train,train_pred))
    test_aucs.append(auc(y_test,test_pred))
    
    ebm = models[1]
    train_pred = ebm.predict_proba(X_train)
    test_pred = ebm.predict_proba(X_test)
    train_aucs.append(auc(y_train,train_pred[:,1]))
    test_aucs.append(auc(y_test,test_pred[:,1]))
    
    XGBoost = models[2]
    train_pred = XGBoost.predict_proba(X_train)
    test_pred = XGBoost.predict_proba(X_test)
    train_aucs.append(auc(y_train,train_pred[:,1]))
    test_aucs.append(auc(y_test,test_pred[:,1]))
    
    logit = models[3]
    train_pred = logit.predict_proba(X_train)
    test_pred = logit.predict_proba(X_test)
    train_aucs.append(auc(y_train,train_pred[:,1]))
    test_aucs.append(auc(y_test,test_pred[:,1]))

    return pd.DataFrame([train_aucs, test_aucs], ['train', 'test'], ['slim', 'ebm', 'shap', 'logit'])

In [21]:
names = ['bankruptcy','breastcancer','haberman','heart','mammo','mushroom','spambase', 'adult']
performances = []
sds = []

for dataname in names:
    fold_performances = []
    for fold in range(5):
        models = load_models(dataname+'_models_600_cv'+str(fold))
        data = load_models(dataname+'_data_cv'+str(fold))
        performance = performance_measures(data, models)
        fold_performances.append(performance)
    folds_df = pd.concat(fold_performances)
    folds_df.index.names = ['validation']
    performances.append(folds_df.groupby('validation').mean())
    sds.append(folds_df.groupby('validation').std())
performances[-1]

Unnamed: 0_level_0,slim,ebm,shap,logit
validation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
test,0.847226,0.890406,0.89339,0.8906
train,0.846707,0.891326,0.906507,0.891406


In [22]:
sds[-1]

Unnamed: 0_level_0,slim,ebm,shap,logit
validation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
test,0.011512,0.004509,0.005023,0.004478
train,0.006373,0.001113,0.002918,0.001153


In [24]:
# corrs_df = pd.concat(corrs, keys = names)
sds_df = pd.concat(sds, keys= names)
sds_df.index.names = ['dataset', 'validation']
sds_df.reset_index()

Unnamed: 0,dataset,validation,slim,ebm,shap,logit
0,bankruptcy,test,0.006518,0.000761,0.0,0.0
1,bankruptcy,train,0.002853,0.0,0.0,0.0
2,breastcancer,test,0.005987,0.00732,0.008019168,0.00457028
3,breastcancer,train,0.003115,0.000734,4.987587e-05,0.001350922
4,haberman,test,0.107018,0.080087,0.1312392,0.1101864
5,haberman,train,0.02886,0.025989,0.01839336,0.0175306
6,heart,test,0.023146,0.031294,0.03807233,0.02890056
7,heart,train,0.009499,0.020579,0.0002329821,0.008636592
8,mammo,test,0.039978,0.02584,0.01743885,0.02419397
9,mammo,train,0.017947,0.00606,0.004052506,0.005525324


In [25]:
# corrs_df = pd.concat(corrs, keys = names)
performance_df = pd.concat(performances, keys= names)
performance_df.index.names = ['dataset', 'validation']
performance_df.reset_index()

Unnamed: 0,dataset,validation,slim,ebm,shap,logit
0,bankruptcy,test,0.992971,0.99966,1.0,1.0
1,bankruptcy,train,0.996773,1.0,1.0,1.0
2,breastcancer,test,0.985562,0.99255,0.992071,0.995222
3,breastcancer,train,0.991098,0.998785,0.999912,0.996353
4,haberman,test,0.730686,0.629926,0.631642,0.673922
5,haberman,train,0.688255,0.823047,0.929665,0.704827
6,heart,test,0.886813,0.908186,0.900541,0.898936
7,heart,train,0.89427,0.954389,0.999027,0.936305
8,mammo,test,0.798674,0.850296,0.844936,0.855403
9,mammo,train,0.810708,0.859919,0.874816,0.860082


In [52]:
total_df = pd.concat([performance_df, sds_df], keys = ['AUC','sdev'])
total_df.index.names = ['metric','dataset', 'validation']

In [53]:
total_df.xs('test', level=2)

Unnamed: 0_level_0,Unnamed: 1_level_0,slim,ebm,shap,logit
metric,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,bankruptcy,0.992971,0.99966,1.0,1.0
AUC,breastcancer,0.985562,0.99255,0.992071,0.995222
AUC,haberman,0.730686,0.629926,0.631642,0.673922
AUC,heart,0.886813,0.908186,0.900541,0.898936
AUC,mammo,0.798674,0.850296,0.844936,0.855403
AUC,mushroom,0.929572,0.970629,0.997667,0.999942
AUC,spambase,0.897987,0.692424,0.974185,0.95789
AUC,adult,0.847226,0.890406,0.89339,0.8906
sdev,bankruptcy,0.006518,0.000761,0.0,0.0
sdev,breastcancer,0.005987,0.00732,0.008019,0.00457


In [66]:
stack = total_df.xs('train', level=2).stack()
stack.index.names = ['metric','dataset',  'model']
pivot = pd.pivot_table(pd.DataFrame(stack), index='dataset', columns = ['model','metric'])
print(pivot.round(3).to_latex())

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{8}{l}{0} \\
model & \multicolumn{2}{l}{ebm} & \multicolumn{2}{l}{logit} & \multicolumn{2}{l}{shap} & \multicolumn{2}{l}{slim} \\
metric &    AUC &   sdev &    AUC &   sdev &    AUC &   sdev &    AUC &   sdev \\
dataset      &        &        &        &        &        &        &        &        \\
\midrule
bankruptcy   &  1.000 &  0.000 &  1.000 &  0.000 &  1.000 &  0.000 &  0.997 &  0.003 \\
breastcancer &  0.999 &  0.001 &  0.996 &  0.001 &  1.000 &  0.000 &  0.991 &  0.003 \\
haberman     &  0.823 &  0.026 &  0.705 &  0.018 &  0.930 &  0.018 &  0.688 &  0.029 \\
heart        &  0.954 &  0.021 &  0.936 &  0.009 &  0.999 &  0.000 &  0.894 &  0.009 \\
mammo        &  0.860 &  0.006 &  0.860 &  0.006 &  0.875 &  0.004 &  0.811 &  0.018 \\
mushroom     &  1.000 &  0.000 &  1.000 &  0.000 &  1.000 &  0.000 &  0.990 &  0.006 \\
spambase     &  0.699 &  0.268 &  0.978 &  0.005 &  0.999 &  0.000 &  0.914 &  0.024 \\
adult        &  0.891 

## Now the rest of the code considers getting correlation and cosine similarity measures

Code for retrieving explanations

In [123]:
def get_explanations(results, X, y, pred_idx=0, printing=False):
    
    rho, slim_predictions, ebm, explainer, shap_values, logit = results   
    
    observation = pd.Series(1, index=['Constant']).append(X.iloc[pred_idx]).append(pd.Series(y.iloc[pred_idx], index=['prediction']))
    
    slim_contributions = X.iloc[pred_idx]*rho[1:]
    slim_threshold = rho[0]
    slim_prediction = slim_predictions.iloc[pred_idx]
    slim_explanation = pd.Series(slim_threshold, index=['Constant']).append(slim_contributions).append(pd.Series(slim_prediction, index=['prediction'])) 

    logit_contributions = X.iloc[pred_idx]*logit.coef_.ravel()
    logit_intercept = logit.intercept_
    logit_prediction = int(sum(logit_contributions)+logit_intercept>0)
    logit_explanation = pd.Series(logit_intercept, index=['Constant']).append(logit_contributions).append(pd.Series(logit_prediction, index=['prediction']))

    ebm_local = ebm.explain_local(X.iloc[pred_idx:pred_idx+1], y.iloc[pred_idx:pred_idx+1], name='EBM')
    ebm_contributions = pd.Series(ebm_local.data(0)['scores'], index=X_names[1:])
    ebm_mean = ebm_local.data(0)['extra']['scores'][0]                               
    ebm_prediction = int(sum(ebm_contributions)+ebm_mean>0)
    ebm_explanation = pd.Series(ebm_mean, index= ['Constant']).append(ebm_contributions).append(pd.Series(ebm_prediction, index=['prediction']))

    shap_contributions = pd.Series(shap_values[pred_idx,:], index=X_names[1:])
    shap_mean = explainer.expected_value                              
    shap_prediction =  int(sum(shap_contributions)+ shap_mean>0)
    shap_explanation = pd.Series(shap_mean, index = ['Constant']).append(shap_contributions).append(pd.Series(shap_prediction, index=['prediction']))
    
    explanations = pd.DataFrame([observation, slim_explanation, logit_explanation, ebm_explanation, shap_explanation], index=['X', 'slim', 'logit', 'ebm', 'shap'])
    explanations['sum'] = explanations[list(explanations.columns)].sum(axis=1)
    if printing:
        display(explanations)
        print("prediction slim: ", slim_prediction)
        print("prediction logit: ", logit_prediction)
        print("prediction ebm: ", ebm_prediction)
        print("prediction shap: ", shap_prediction)
        print("true y: ", y.iloc[pred_idx])   
    
    return explanations

def clean_prediction(df):
    if df['prediction'].mean() in [0,1]: # unanimous
        return df
    
def zero_filter(df):
    sums = df.sum(axis=1)
    if (sums.isin([0])*1).sum()==0:
        return df

def preprocess_results(expl, only_correct=True, no_zeros=True, printing=True):
    # add observation index
    complete_expl = pd.concat(expl, keys = ["{:02d}".format(x) for x in range(N)])
    complete_expl.index.names = ['obs', 'model']
    # clean to only include the contributions

    if only_correct:
        contrib = complete_expl.groupby(level='obs').apply(clean_prediction)
        if len(contrib.index.names)>2:
            contrib=contrib.droplevel(0)
        contrib = contrib.drop(['X'], level = 1).drop(['Constant', 'sum', 'prediction'], axis=1)
        contrib.index = contrib.index.remove_unused_levels()
    else:    
        contrib = complete_expl.drop(['X'], level = 1).drop(['Constant', 'sum', 'prediction'], axis=1)
        contrib.index = contrib.index.remove_unused_levels()

    if no_zeros:
        contrib = contrib.groupby(level='obs').apply(zero_filter)
        if len(contrib.index.names)>2:
            contrib=contrib.droplevel(0)
    
    if printing:
        print("Original number of explanations:  ", N)
        print("Cleaned explanation set size:     ", int(contrib.shape[0]/4))
    return contrib

from scipy.stats import pearsonr
def pearson(df):
    return(pearsonr(df.iloc[0], df.iloc[1])[0])
def get_pearson_matrix(df, func=pearson):
    
    models = list(df.index.levels[1])
    p = len(models)
    distance_matrix = np.zeros((p,p))
    
    for i in range(p):
        for j in range(i+1,p):
            to_drop = models.copy()
            del to_drop[j]
            del to_drop[i]
            diff = df.drop(to_drop, level=1).groupby(level='obs').apply(func).mean()
            distance_matrix[i,j] = round(diff,3)
            distance_matrix[j,i] = round(diff,3)
    distance_df = pd.DataFrame(distance_matrix, models, models)
    distance_df['Average correlation'] = distance_df.sum()/(p-1)
    return distance_df

from scipy import spatial

def cosine(df):
    return(1-spatial.distance.cosine(df.iloc[0], df.iloc[1]))
def get_cosine_matrix(df, func=cosine):
    
    models = list(df.index.levels[1])
    p = len(models)
    distance_matrix = np.zeros((p,p))
    
    for i in range(p):
        for j in range(i+1,p):
            to_drop = models.copy()
            del to_drop[j]
            del to_drop[i]
            diff = df.drop(to_drop, level=1).groupby(level='obs').apply(func).mean()
            distance_matrix[i,j] = round(diff,3)
            distance_matrix[j,i] = round(diff,3)
    distance_df = pd.DataFrame(distance_matrix, models, models)
    distance_df['Average cosine similarity'] = distance_df.sum()/(p-1)
    return distance_df

In [193]:
names = ['bankruptcy','breastcancer','haberman','heart','mammo','mushroom','spambase', 'adult']
# names = ['mammo']
correlations = []
cosines = []
folds = 5


for dataname in names:
    dataset_corrs = []
    dataset_cosines = []
    for fold in range(folds):
        models = load_models(dataname+'_models_600_cv'+str(fold))
        data = load_models(dataname+'_data_cv'+str(fold))
        X_names = list(data['X_test'].columns.values)
        X_names.insert(0, '(Intercept)')

        N = data['X_test'].shape[0]
        expl = []
        results = extract_results(models, data['X_test'])
        for idx in range(N):
            expl.append(get_explanations(results,data['X_test'], data['y_test'], idx, printing=False))
            
        print("Dataset: ", dataname, " fold: ", str(fold))
        contrib = preprocess_results(expl, True, True, True)
        
        corr_distance = get_pearson_matrix(contrib)
        dataset_corrs.append(corr_distance)
        
        cosine_distance = get_cosine_matrix(contrib)
        dataset_cosines.append(cosine_distance)

    dataset_corrs_df = pd.concat(dataset_corrs, keys = range(folds))
    dataset_corrs_df.index.names = ['fold','model']
    correlations.append(dataset_corrs_df.groupby('model').mean())
    
    dataset_cosines_df = pd.concat(dataset_cosines, keys = range(folds))
    dataset_cosines_df.index.names = ['fold','model']
    cosines.append(dataset_cosines_df.groupby('model').mean())
    

Dataset:  bankruptcy  fold:  0
Original number of explanations:   51
Cleaned explanation set size:      34
Dataset:  bankruptcy  fold:  1
Original number of explanations:   51
Cleaned explanation set size:      48
Dataset:  bankruptcy  fold:  2
Original number of explanations:   50
Cleaned explanation set size:      40
Dataset:  bankruptcy  fold:  3
Original number of explanations:   49
Cleaned explanation set size:      43
Dataset:  bankruptcy  fold:  4
Original number of explanations:   49
Cleaned explanation set size:      33
Dataset:  breastcancer  fold:  0
Original number of explanations:   137
Cleaned explanation set size:      117
Dataset:  breastcancer  fold:  1
Original number of explanations:   137
Cleaned explanation set size:      128
Dataset:  breastcancer  fold:  2
Original number of explanations:   137
Cleaned explanation set size:      130
Dataset:  breastcancer  fold:  3
Original number of explanations:   137
Cleaned explanation set size:      131
Dataset:  breastcance

In [188]:
dataset = 0
print(correlations[dataset].drop(columns=['Average correlation'])[['slim','logit','ebm', 'shap']].round(3).reindex(['slim', 'logit', 'ebm', 'shap']).to_latex())
print(cosines[dataset].drop(columns=['Average cosine similarity'])[['slim','logit','ebm', 'shap']].round(3).reindex(['slim', 'logit', 'ebm', 'shap']).to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &   slim &  logit &    ebm &   shap \\
model &        &        &        &        \\
\midrule
slim  &  0.000 &  0.932 &  0.804 &  0.921 \\
logit &  0.932 &  0.000 &  0.817 &  0.804 \\
ebm   &  0.804 &  0.817 &  0.000 &  0.813 \\
shap  &  0.921 &  0.804 &  0.813 &  0.000 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrrrr}
\toprule
{} &   slim &  logit &    ebm &   shap \\
model &        &        &        &        \\
\midrule
slim  &  0.000 &  0.935 &  0.826 &  0.929 \\
logit &  0.935 &  0.000 &  0.841 &  0.834 \\
ebm   &  0.826 &  0.841 &  0.000 &  0.861 \\
shap  &  0.929 &  0.834 &  0.861 &  0.000 \\
\bottomrule
\end{tabular}



In [189]:
correlations_df = pd.concat(correlations, keys = names)
correlations_df.index.names = ['dataset','model']

In [190]:
cor_avgs = correlations_df.groupby('model').mean()
print(cor_avgs.drop(columns=['Average correlation'])[['slim','logit','ebm', 'shap']].round(3).reindex(['slim', 'logit', 'ebm', 'shap']).to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &   slim &  logit &    ebm &   shap \\
model &        &        &        &        \\
\midrule
slim  &  0.000 &  0.697 &  0.383 &  0.505 \\
logit &  0.697 &  0.000 &  0.450 &  0.511 \\
ebm   &  0.383 &  0.450 &  0.000 &  0.622 \\
shap  &  0.505 &  0.511 &  0.622 &  0.000 \\
\bottomrule
\end{tabular}



In [191]:
cor_stds = correlations_df.groupby('model').std()
print(cor_stds.drop(columns=['Average correlation'])[['slim','logit','ebm', 'shap']].round(3).reindex(['slim', 'logit', 'ebm', 'shap']).to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &   slim &  logit &    ebm &   shap \\
model &        &        &        &        \\
\midrule
slim  &  0.000 &  0.250 &  0.300 &  0.294 \\
logit &  0.250 &  0.000 &  0.273 &  0.198 \\
ebm   &  0.300 &  0.273 &  0.000 &  0.289 \\
shap  &  0.294 &  0.198 &  0.289 &  0.000 \\
\bottomrule
\end{tabular}



In [192]:
cosines_df = pd.concat(cosines, keys = names)
cosines_df.index.names = ['dataset','model']

In [176]:
cos_avgs = cosines_df.groupby('model').mean()
print(cos_avgs.drop(columns=['Average cosine similarity'])[['slim','logit','ebm', 'shap']].round(3).reindex(['slim', 'logit', 'ebm', 'shap']).to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &   slim &  logit &    ebm &   shap \\
model &        &        &        &        \\
\midrule
slim  &  0.000 &  0.707 &  0.326 &  0.410 \\
logit &  0.707 &  0.000 &  0.416 &  0.492 \\
ebm   &  0.326 &  0.416 &  0.000 &  0.657 \\
shap  &  0.410 &  0.492 &  0.657 &  0.000 \\
\bottomrule
\end{tabular}



In [177]:
cos_stds = cosines_df.groupby('model').std()
print(cos_stds.drop(columns=['Average cosine similarity'])[['slim','logit','ebm', 'shap']].round(3).reindex(['slim', 'logit', 'ebm', 'shap']).to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &   slim &  logit &    ebm &   shap \\
model &        &        &        &        \\
\midrule
slim  &  0.000 &  0.239 &  0.325 &  0.375 \\
logit &  0.239 &  0.000 &  0.361 &  0.369 \\
ebm   &  0.325 &  0.361 &  0.000 &  0.308 \\
shap  &  0.375 &  0.369 &  0.308 &  0.000 \\
\bottomrule
\end{tabular}

