In [1]:
import sys
import os

#Use if working on Colab
#from google.colab import drive
#drive.mount('/content/drive')
#PATH = '/content/drive/My Drive/PPM_Stability/'

#If working locally
PATH = os.getcwd()
sys.path.append(PATH)

In [2]:
#!pip install lime==0.2.0.1
#!pip install shap==0.37.0
#!pip install xgboost==1.0.0
#!pip install anchor-exp==0.0.2.0

In [3]:
import EncoderFactory
#from DatasetManager_for_colab import DatasetManager
from DatasetManager import DatasetManager
import BucketFactory
import stability as st #Nogueira, Sechidis, Brown.

import pandas as pd
import numpy as np
from scipy import stats

from sklearn.metrics import roc_auc_score
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import StandardScaler

import time
import os
import sys
from sys import argv
import pickle
from collections import defaultdict
import random
import joblib

from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

import lime
import lime.lime_tabular
from lime import submodular_pick

from anchor import anchor_tabular
#from alibi.utils.data import gen_category_map

import shap

import warnings
warnings.filterwarnings('ignore')

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

def imp_df(column_names, importances):
        df = pd.DataFrame({'feature': column_names,
                       'feature_importance': importances}) \
           .sort_values('feature_importance', ascending = False) \
           .reset_index(drop = True)
        return df

# plotting a feature importance dataframe (horizontal barchart)
def var_imp_plot(imp_df, title, num_feat):
        imp_df.columns = ['feature', 'feature_importance']
        b= sns.barplot(x = 'feature_importance', y = 'feature', data = imp_df.head(num_feat), orient = 'h', palette="Blues_r")

In [5]:
from lime import submodular_pick

def generate_lime_explanations(explainer,test_xi, cls, submod=False, test_all_data=None, max_feat = 10, scaler=None):
    
    #print("Actual value ", test_y)
    
#     print(type(test_xi))
#     print(type(cls.predict_proba))
#     print(type(max_feat))

    def scale_predict_fn(X):
        scaled_data = scaler.transform(X)
        pred = cls.predict_proba(scaled_data)
        return pred

    def predict_fn(X):
        #X = X.reshape(1, -1)
        pred = cls.predict_proba(X)
        return pred

    if scaler == None:
        exp = explainer.explain_instance(test_xi, 
                                 predict_fn, num_features=max_feat, labels=[0,1])
    else:
        exp = explainer.explain_instance(test_xi, 
                                 scale_predict_fn, num_features=max_feat, labels=[0,1])
        
    return exp
        
    if submod==True:
        sp_obj=submodular_pick.SubmodularPick(explainer, test_all_data, cls.predict_proba, 
                                      sample_size=20, num_features=num_features,num_exps_desired=4)
        [exp.as_pyplot_figure(label=exp.available_labels()[0]) for exp in sp_obj.sp_explanations];

In [6]:
def dispersal(weights, features):
    
    feat_len = len(features)
    weights_by_feat = []
    
    #Weights are sorted by iteration. Transpose list.
    for i in list(range(feat_len)):
        feat_weight = []
        for iteration in weights:
            feat_weight.append(iteration[i])
        weights_by_feat.append(feat_weight)
    
    dispersal = []
    dispersal_no_outlier = []
    
    for each in weights_by_feat:
        #Find mean and variance of weight for each feature
        mean = np.mean(each)
        std_dev = np.std(each)
        var = std_dev**2
        
        #Calculate relative variance, ignore features where the weight is always 0
        if mean == 0:
            dispersal.append(0)
            dispersal_no_outlier.append(0)
        else:
            rel_var = var/abs(mean)
            dispersal.append(rel_var)
            
            #dispersal without outliers - remove anything with a z-score higher
            #than 3 (more than 3 standard deviations away from the mean)
            rem_outlier = []
            z_scores = stats.zscore(each)

            for i in range(len(z_scores)):
                if -3 < z_scores[i] < 3:
                    rem_outlier.append(each[i])
            if rem_outlier != []:
                new_mean = np.mean(rem_outlier)
                if new_mean == 0:
                    dispersal_no_outlier.append(0)
                else:
                    new_std = np.std(rem_outlier)
                    new_var = new_std**2
                    new_rel_var = new_var/abs(new_mean)
                    dispersal_no_outlier.append(new_rel_var)
            else:
                dispersal_no_outlier.append(rel_var)

    return dispersal, dispersal_no_outlier

In [7]:
def create_samples(shap_explainer, iterations, row, features, top = None, scaler = None):
    length = len(features)
    
    exp = []
    rel_exp = []
    
    if scaler != None:
        row = scaler.transform(row)
    
    for j in range(iterations):
        #Generate shap values for row
        shap_values = shap_explainer(row).values
        #shap_values = [val[0] for val in shap_explanation]
        
        if scaler != None:
            shap_values = scaler.inverse_transform(shap_values)
        
        #Map SHAP values to feature names
        importances = []
        
        abs_values = []
    
        for i in range(length):
            feat = features[i]
            shap_val = shap_values[0][i]
            abs_val = abs(shap_values[0][i])
            entry = (feat, shap_val, abs_val)
            importances.append(entry)
            abs_values.append(abs_val)
        
        #Sort features by influence on result
        importances.sort(key=lambda tup: tup[2], reverse = True)
        
        #Create list of all feature
        exp.append(importances)
        
        #print(exp[0])
        
        #Create list of most important features
        rel_feat = []
        if top != None:
            for i in range(top):
                feat = importances[i]
                if feat[2] > 0:
                    rel_feat.append(feat)

            rel_exp.append(rel_feat)
        else:
            bins = pd.cut(abs_values, 4, duplicates = "drop", retbins = True)[-1]
            q1_min = bins[-2]
            rel_feat = [feat for feat in importances if feat[2] > q1_min]
            rel_exp.append(rel_feat)
        
    return exp, rel_exp

In [8]:
dataset_ref = "sepsis_cases"
params_dir = PATH + "params"
results_dir = "results"
bucket_method = "single"
cls_encoding = "agg"
cls_method = "xgboost"

gap = 1
n_iter = 1

method_name = "%s_%s"%(bucket_method, cls_encoding)

generate_lime = True
generate_shap = True
generate_anchor = False

sample_size = 2
exp_iter = 10
max_feat = 10
max_prefix = 20

dataset_ref_to_datasets = {
    #"bpic2011": ["bpic2011_f%s"%formula for formula in range(1,5)],
    "bpic2015": ["bpic2015_%s_f2"%(municipality) for municipality in range(5,6)],
    "bpic2017" : ["bpic2017_accepted"],
    "bpic2012" : ["bpic2012_accepted"],
    #"insurance": ["insurance_activity", "insurance_followup"],
    "sepsis_cases": ["sepsis_cases_1"],# "sepsis_cases_2", "sepsis_cases_4"]
    "production": ["production"] 
}

datasets = [dataset_ref] if dataset_ref not in dataset_ref_to_datasets else dataset_ref_to_datasets[dataset_ref]

datasets

['sepsis_cases_1']

In [9]:
for dataset_name in datasets:

    min_prefix_length = 1

    dataset_manager = DatasetManager(dataset_name)
    data = dataset_manager.read_dataset()

    all_pipelines = []
    all_cls = []
    all_encoders = []
    all_scalers = []
    all_train = []
    all_samples = []
    all_results = []
    
    for ii in range(n_iter):
        num_buckets = len([name for name in os.listdir(os.path.join(PATH,'%s/%s/%s/pipelines'% (dataset_ref, cls_method, method_name)))])

        for bucket in range(num_buckets):
            bucketID = bucket+1
            print ('Bucket', bucketID)

            #import everything needed to sort and predict
            pipeline_path = os.path.join(PATH, "%s/%s/%s/pipelines/pipeline_bucket_%s.joblib" % 
                                         (dataset_ref, cls_method, method_name, bucketID))
            pipeline = joblib.load(pipeline_path)
            feature_combiner = pipeline['encoder']
            if 'scaler' in pipeline.named_steps:
                scaler = pipeline['scaler']
            else:
                scaler = None
            cls = pipeline['cls']
            
            all_cls.append(cls)
            all_encoders.append(feature_combiner)
            all_scalers.append(scaler)
            all_pipelines.append(pipeline)

            #find relevant samples for bucket
            bucket_sample = pd.read_csv(os.path.join(PATH, "%s/%s/%s/samples/test_sample_bucket_%s.csv" % 
                                      (dataset_ref, cls_method, method_name, bucketID))).values
            results_template = pd.read_csv(os.path.join(PATH, "%s/%s/%s/samples/results_bucket_%s.csv" % 
                                      (dataset_ref, cls_method, method_name, bucketID)))
    
            if scaler != None:
                bucket_sample = scaler.transform(bucket_sample)
            bucket_results = results_template
            
            feat_names = feature_combiner.get_feature_names()
            feat_list = [feat.replace(" ", "_") for feat in feat_names]
            
            all_samples.append(bucket_sample)
            all_results.append(bucket_results)
            
            #import training data for bucket
            train_data = pd.read_csv(os.path.join(PATH, "%s/%s/%s/train_data/train_data_bucket_%s.csv" % 
                                                          (dataset_ref, cls_method, method_name, bucketID))).values
            if scaler != None:
                train_data = scaler.transform(train_data)
            
            all_train.append(train_data)

Bucket 1


In [10]:
if generate_shap:

    for dataset_name in datasets:
        
        for ii in range(n_iter):
            num_buckets = len([name for name in os.listdir(os.path.join(PATH,'%s/%s/%s/pipelines'% 
                                                                        (dataset_ref, cls_method, method_name)))])
            
            for bucket in range(num_buckets):
                bucketID = bucket+1
                print ('Bucket', bucketID)

                cls = all_cls[bucket]
                feature_combiner = all_encoders[bucket]
                scaler = all_scalers[bucket]
                trainingdata = all_train[bucket]
                sample_instances = all_samples[bucket]
                results = all_results[bucket]
                
                if cls_method == "xgboost":
                    shap_explainer = shap.Explainer(cls)
                else:
                    shap_explainer = shap.Explainer(cls, trainingdata)
                print(type(shap_explainer))
                
                feat_list = [feat.replace(" ", "_") for feat in feature_combiner.get_feature_names()]
                
                subset_stability = []
                weight_stability = []
                adjusted_weight_stability = []
                    
                #explain the chosen instances and find the stability score
                instance_no = 0
                for instance in sample_instances:
                    instance_no += 1    
                    print("Testing", instance_no, "of", len(sample_instances), ".")
                    
                    #if cls_method == "xgboost":
                    instance = instance.reshape(1, -1)

                    #Get Tree SHAP explanations for instance
                    exp, rel_exp = create_samples(shap_explainer, exp_iter, instance, feat_list, scaler = scaler)

                    feat_pres = []
                    feat_weights = []

                    for iteration in rel_exp:
                        #print("Computing feature presence for iteration", rel_exp.index(iteration))

                        presence_list = [0]*len(feat_list)

                        for each in feat_list:
                            list_idx = feat_list.index(each)

                            for explanation in iteration:
                                if each in explanation[0]:
                                    presence_list[list_idx] = 1

                        feat_pres.append(presence_list)

                    for iteration in exp:
                        #print("Compiling feature weights for iteration", exp.index(iteration))

                        weights = [0]*len(feat_list)

                        for each in feat_list:
                            list_idx = feat_list.index(each)

                            for explanation in iteration:
                                if each in explanation[0]:

                                    weights[list_idx] = explanation[1]
                        feat_weights.append(weights)

                    stability = st.getStability(feat_pres)
                    print ("Stability:", round(stability,2))
                    subset_stability.append(stability)

                    rel_var, second_var = dispersal(feat_weights, feat_list)
                    avg_dispersal = 1-np.mean(rel_var)
                    print ("Dispersal of feature importance:", round(avg_dispersal, 2))
                    weight_stability.append(avg_dispersal)
                    adj_dispersal = 1-np.mean(second_var)
                    print ("Dispersal with no outliers:", round(adj_dispersal, 2))
                    adjusted_weight_stability.append(adj_dispersal)
                    
                results["SHAP Subset Stability"] = subset_stability
                results["SHAP Weight Stability"] = weight_stability
                results["SHAP Adjusted Weight Stability"] = adjusted_weight_stability
                all_results[bucket] = results

Bucket 1
<class 'shap.explainers._tree.Tree'>
Testing 1 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 2 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 3 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 4 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 5 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 6 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 7 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 8 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 9 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 10 of 992 .
Stabili

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 81 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 82 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 83 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 84 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 85 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 86 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 87 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 88 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 89 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal wi

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 160 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 161 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 162 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 163 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 164 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 165 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 166 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 167 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 168 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 239 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 240 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 241 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 242 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 243 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 244 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 245 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 246 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 247 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 318 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 319 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 320 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 321 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 322 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 323 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 324 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 325 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 326 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 397 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 398 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 399 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 400 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 401 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 402 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 403 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 404 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 405 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 476 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 477 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 478 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 479 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 480 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 481 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 482 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 483 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 484 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 555 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 556 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 557 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 558 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 559 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 560 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 561 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 562 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 563 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 634 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 635 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 636 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 637 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 638 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 639 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 640 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 641 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 642 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 713 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 714 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 715 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 716 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 717 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 718 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 719 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 720 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 721 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 792 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 793 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 794 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 795 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 796 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 797 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 798 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 799 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 800 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 871 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 872 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 873 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 874 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 875 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 876 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 877 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 878 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 879 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 950 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 951 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 952 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 953 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 954 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 955 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 956 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 957 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dispersal with no outliers: 1.0
Testing 958 of 992 .
Stability: 1.0
Dispersal of feature importance: 1.0
Dis

In [11]:
pd.concat(all_results).to_csv(os.path.join(PATH,"%s/%s/%s/samples/results.csv") % (dataset_ref, cls_method, method_name))

In [12]:
if generate_lime:

    for dataset_name in datasets:
        
        num_buckets = len([name for name in os.listdir(os.path.join(PATH,'%s/%s/%s/pipelines'% 
                                                                    (dataset_ref, cls_method, method_name)))])
            
        for bucket in range(num_buckets):
            bucketID = bucket+1
            print ('Bucket', bucketID)
            
            cls = all_cls[bucket]
            feature_combiner = all_encoders[bucket]
            scaler = all_scalers[bucket]
            trainingdata = all_train[bucket]
            sample_instances = all_samples[bucket]
            results = all_results[bucket]
            pipeline = all_pipelines[bucket]            

            feat_list = [feat.replace(" ", "_") for feat in feature_combiner.get_feature_names()]
            class_names = ["Negative", "Positive"]

            subset_stability = []
            weight_stability = []
            adjusted_weight_stability = []

            #create explainer now that can be passed later
            lime_explainer = lime.lime_tabular.LimeTabularExplainer(trainingdata,
                                  feature_names = feat_list, class_names=class_names)
            
            instance_no = 0
            print(len(sample_instances))
            #explain the chosen instances and find the stability score
            for instance in sample_instances:
                instance_no += 1

                print("Testing", instance_no, "of", len(sample_instances), ".")

                #Get lime explanations for instance
                feat_pres = []
                feat_weights = []
                
                for iteration in list(range(exp_iter)):

                    lime_exp = generate_lime_explanations(lime_explainer, instance, cls,
                                                          max_feat = len(feat_list), scaler = scaler)

                    all_weights = [exp[1] for exp in lime_exp.as_list()]
                    bins = pd.cut(all_weights, 4, duplicates = "drop", retbins = True)[-1]
                    q1_min = bins[-2]

                    presence_list = [0]*len(feat_list)
                    weights = [0]*len(feat_list)

                    for each in feat_list:
                        list_idx = feat_list.index(each)
                        #print ("Feature", list_idx)
                        for explanation in lime_exp.as_list():
                            if each in explanation[0]:
                                if explanation[1] > q1_min:
                                    presence_list[list_idx] = 1
                                weights[list_idx] = explanation[1]

                    feat_pres.append(presence_list)
                    feat_weights.append(weights)

                stability = st.getStability(feat_pres)
                print ("Stability:", round(stability,2))
                subset_stability.append(stability)

                rel_var, second_var = dispersal(feat_weights, feat_list)
                avg_dispersal = 1-np.mean(rel_var)
                print ("Dispersal of feature importance:", round(avg_dispersal, 2))
                weight_stability.append(avg_dispersal)
                adj_dispersal = 1-np.mean(second_var)
                print ("Dispersal with no outliers:", round(adj_dispersal, 2))
                adjusted_weight_stability.append(adj_dispersal)

            results["LIME Subset Stability"] = subset_stability
            results["LIME Weight Stability"] = weight_stability
            results["LIME Adjusted Weight Stability"] = adjusted_weight_stability
            all_results[bucket] = results

Bucket 1
992
Testing 1 of 992 .
Stability: 0.21
Dispersal of feature importance: 0.24
Dispersal with no outliers: 0.24
Testing 2 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.44
Dispersal with no outliers: 0.44
Testing 3 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.59
Dispersal with no outliers: 0.59
Testing 4 of 992 .
Stability: 0.29
Dispersal of feature importance: 0.17
Dispersal with no outliers: 0.17
Testing 5 of 992 .
Stability: 0.23
Dispersal of feature importance: 0.15
Dispersal with no outliers: 0.15
Testing 6 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.44
Dispersal with no outliers: 0.44
Testing 7 of 992 .
Stability: 0.19
Dispersal of feature importance: 0.48
Dispersal with no outliers: 0.48
Testing 8 of 992 .
Stability: 0.21
Dispersal of feature importance: 0.41
Dispersal with no outliers: 0.41
Testing 9 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.35
Dispersal with no outliers: 0.35
Testing 10 of 992 .
Stability: 0.18

Stability: 0.21
Dispersal of feature importance: 0.59
Dispersal with no outliers: 0.59
Testing 79 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.09
Dispersal with no outliers: 0.09
Testing 80 of 992 .
Stability: 0.16
Dispersal of feature importance: 0.54
Dispersal with no outliers: 0.54
Testing 81 of 992 .
Stability: 0.23
Dispersal of feature importance: -0.67
Dispersal with no outliers: -0.67
Testing 82 of 992 .
Stability: 0.18
Dispersal of feature importance: 0.59
Dispersal with no outliers: 0.59
Testing 83 of 992 .
Stability: 0.21
Dispersal of feature importance: 0.4
Dispersal with no outliers: 0.4
Testing 84 of 992 .
Stability: 0.18
Dispersal of feature importance: 0.26
Dispersal with no outliers: 0.26
Testing 85 of 992 .
Stability: 0.15
Dispersal of feature importance: 0.43
Dispersal with no outliers: 0.43
Testing 86 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.61
Dispersal with no outliers: 0.61
Testing 87 of 992 .
Stability: 0.19
Dispersal of feature 

Stability: 0.26
Dispersal of feature importance: 0.33
Dispersal with no outliers: 0.33
Testing 155 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.14
Dispersal with no outliers: 0.14
Testing 156 of 992 .
Stability: 0.21
Dispersal of feature importance: 0.54
Dispersal with no outliers: 0.54
Testing 157 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.28
Dispersal with no outliers: 0.28
Testing 158 of 992 .
Stability: 0.26
Dispersal of feature importance: -0.01
Dispersal with no outliers: -0.01
Testing 159 of 992 .
Stability: 0.16
Dispersal of feature importance: 0.07
Dispersal with no outliers: 0.07
Testing 160 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.51
Dispersal with no outliers: 0.51
Testing 161 of 992 .
Stability: 0.22
Dispersal of feature importance: -4.68
Dispersal with no outliers: -4.68
Testing 162 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.03
Dispersal with no outliers: 0.03
Testing 163 of 992 .
Stability: 0.18
Dispersa

Stability: 0.23
Dispersal of feature importance: 0.46
Dispersal with no outliers: 0.46
Testing 231 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.39
Dispersal with no outliers: 0.39
Testing 232 of 992 .
Stability: 0.26
Dispersal of feature importance: 0.36
Dispersal with no outliers: 0.36
Testing 233 of 992 .
Stability: 0.19
Dispersal of feature importance: 0.01
Dispersal with no outliers: 0.01
Testing 234 of 992 .
Stability: 0.27
Dispersal of feature importance: -1.2
Dispersal with no outliers: -1.2
Testing 235 of 992 .
Stability: 0.18
Dispersal of feature importance: -0.56
Dispersal with no outliers: -0.56
Testing 236 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.64
Dispersal with no outliers: 0.64
Testing 237 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.24
Dispersal with no outliers: 0.24
Testing 238 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.56
Dispersal with no outliers: 0.56
Testing 239 of 992 .
Stability: 0.25
Dispersal o

Stability: 0.26
Dispersal of feature importance: 0.21
Dispersal with no outliers: 0.21
Testing 307 of 992 .
Stability: 0.16
Dispersal of feature importance: 0.13
Dispersal with no outliers: 0.13
Testing 308 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.14
Dispersal with no outliers: 0.14
Testing 309 of 992 .
Stability: 0.24
Dispersal of feature importance: -0.07
Dispersal with no outliers: -0.07
Testing 310 of 992 .
Stability: 0.25
Dispersal of feature importance: -0.2
Dispersal with no outliers: -0.2
Testing 311 of 992 .
Stability: 0.21
Dispersal of feature importance: 0.64
Dispersal with no outliers: 0.64
Testing 312 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.14
Dispersal with no outliers: 0.14
Testing 313 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.58
Dispersal with no outliers: 0.58
Testing 314 of 992 .
Stability: 0.19
Dispersal of feature importance: 0.07
Dispersal with no outliers: 0.07
Testing 315 of 992 .
Stability: 0.21
Dispersal of

Stability: 0.21
Dispersal of feature importance: 0.54
Dispersal with no outliers: 0.54
Testing 383 of 992 .
Stability: 0.23
Dispersal of feature importance: 0.38
Dispersal with no outliers: 0.38
Testing 384 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.51
Dispersal with no outliers: 0.51
Testing 385 of 992 .
Stability: 0.27
Dispersal of feature importance: 0.41
Dispersal with no outliers: 0.41
Testing 386 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.4
Dispersal with no outliers: 0.4
Testing 387 of 992 .
Stability: 0.26
Dispersal of feature importance: 0.01
Dispersal with no outliers: 0.01
Testing 388 of 992 .
Stability: 0.26
Dispersal of feature importance: -0.71
Dispersal with no outliers: -0.71
Testing 389 of 992 .
Stability: 0.28
Dispersal of feature importance: 0.46
Dispersal with no outliers: 0.46
Testing 390 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.39
Dispersal with no outliers: 0.39
Testing 391 of 992 .
Stability: 0.2
Dispersal of f

Stability: 0.21
Dispersal of feature importance: 0.52
Dispersal with no outliers: 0.52
Testing 459 of 992 .
Stability: 0.23
Dispersal of feature importance: 0.01
Dispersal with no outliers: 0.01
Testing 460 of 992 .
Stability: 0.18
Dispersal of feature importance: 0.45
Dispersal with no outliers: 0.45
Testing 461 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.28
Dispersal with no outliers: 0.28
Testing 462 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.44
Dispersal with no outliers: 0.44
Testing 463 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.08
Dispersal with no outliers: 0.08
Testing 464 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.46
Dispersal with no outliers: 0.46
Testing 465 of 992 .
Stability: 0.17
Dispersal of feature importance: 0.45
Dispersal with no outliers: 0.45
Testing 466 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.52
Dispersal with no outliers: 0.52
Testing 467 of 992 .
Stability: 0.23
Dispersal of f

Stability: 0.31
Dispersal of feature importance: 0.51
Dispersal with no outliers: 0.51
Testing 535 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.4
Dispersal with no outliers: 0.4
Testing 536 of 992 .
Stability: 0.17
Dispersal of feature importance: 0.07
Dispersal with no outliers: 0.07
Testing 537 of 992 .
Stability: 0.25
Dispersal of feature importance: -3.22
Dispersal with no outliers: -3.22
Testing 538 of 992 .
Stability: 0.19
Dispersal of feature importance: -0.02
Dispersal with no outliers: -0.02
Testing 539 of 992 .
Stability: 0.26
Dispersal of feature importance: 0.19
Dispersal with no outliers: 0.19
Testing 540 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.39
Dispersal with no outliers: 0.39
Testing 541 of 992 .
Stability: 0.22
Dispersal of feature importance: -0.03
Dispersal with no outliers: -0.03
Testing 542 of 992 .
Stability: 0.2
Dispersal of feature importance: -2.39
Dispersal with no outliers: -2.39
Testing 543 of 992 .
Stability: 0.21
Dispers

Stability: 0.22
Dispersal of feature importance: 0.13
Dispersal with no outliers: 0.13
Testing 611 of 992 .
Stability: 0.26
Dispersal of feature importance: -0.07
Dispersal with no outliers: -0.07
Testing 612 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.31
Dispersal with no outliers: 0.31
Testing 613 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.43
Dispersal with no outliers: 0.43
Testing 614 of 992 .
Stability: 0.23
Dispersal of feature importance: 0.32
Dispersal with no outliers: 0.32
Testing 615 of 992 .
Stability: 0.18
Dispersal of feature importance: 0.32
Dispersal with no outliers: 0.32
Testing 616 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.49
Dispersal with no outliers: 0.49
Testing 617 of 992 .
Stability: 0.24
Dispersal of feature importance: -2.04
Dispersal with no outliers: -2.04
Testing 618 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.4
Dispersal with no outliers: 0.4
Testing 619 of 992 .
Stability: 0.21
Dispersal o

Stability: 0.26
Dispersal of feature importance: 0.13
Dispersal with no outliers: 0.13
Testing 687 of 992 .
Stability: 0.22
Dispersal of feature importance: -0.47
Dispersal with no outliers: -0.47
Testing 688 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.16
Dispersal with no outliers: 0.16
Testing 689 of 992 .
Stability: 0.28
Dispersal of feature importance: 0.7
Dispersal with no outliers: 0.7
Testing 690 of 992 .
Stability: 0.19
Dispersal of feature importance: -1.32
Dispersal with no outliers: -1.32
Testing 691 of 992 .
Stability: 0.18
Dispersal of feature importance: 0.14
Dispersal with no outliers: 0.14
Testing 692 of 992 .
Stability: 0.18
Dispersal of feature importance: 0.32
Dispersal with no outliers: 0.32
Testing 693 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.19
Dispersal with no outliers: 0.19
Testing 694 of 992 .
Stability: 0.19
Dispersal of feature importance: 0.16
Dispersal with no outliers: 0.16
Testing 695 of 992 .
Stability: 0.2
Dispersal o

Stability: 0.18
Dispersal of feature importance: 0.44
Dispersal with no outliers: 0.44
Testing 763 of 992 .
Stability: 0.23
Dispersal of feature importance: -5.16
Dispersal with no outliers: -5.16
Testing 764 of 992 .
Stability: 0.22
Dispersal of feature importance: 0.49
Dispersal with no outliers: 0.49
Testing 765 of 992 .
Stability: 0.21
Dispersal of feature importance: -0.03
Dispersal with no outliers: -0.03
Testing 766 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.45
Dispersal with no outliers: 0.45
Testing 767 of 992 .
Stability: 0.18
Dispersal of feature importance: -0.43
Dispersal with no outliers: -0.43
Testing 768 of 992 .
Stability: 0.24
Dispersal of feature importance: -0.02
Dispersal with no outliers: -0.02
Testing 769 of 992 .
Stability: 0.29
Dispersal of feature importance: -0.11
Dispersal with no outliers: -0.11
Testing 770 of 992 .
Stability: 0.28
Dispersal of feature importance: 0.46
Dispersal with no outliers: 0.46
Testing 771 of 992 .
Stability: 0.22
Di

Stability: 0.17
Dispersal of feature importance: -0.41
Dispersal with no outliers: -0.41
Testing 839 of 992 .
Stability: 0.25
Dispersal of feature importance: 0.08
Dispersal with no outliers: 0.08
Testing 840 of 992 .
Stability: 0.25
Dispersal of feature importance: -2.77
Dispersal with no outliers: -2.77
Testing 841 of 992 .
Stability: 0.18
Dispersal of feature importance: 0.51
Dispersal with no outliers: 0.51
Testing 842 of 992 .
Stability: 0.23
Dispersal of feature importance: -4.84
Dispersal with no outliers: -4.84
Testing 843 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.15
Dispersal with no outliers: 0.15
Testing 844 of 992 .
Stability: 0.21
Dispersal of feature importance: -0.28
Dispersal with no outliers: -0.28
Testing 845 of 992 .
Stability: 0.21
Dispersal of feature importance: 0.63
Dispersal with no outliers: 0.63
Testing 846 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.43
Dispersal with no outliers: 0.43
Testing 847 of 992 .
Stability: 0.2
Dispe

Stability: 0.23
Dispersal of feature importance: 0.44
Dispersal with no outliers: 0.44
Testing 915 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.21
Dispersal with no outliers: 0.21
Testing 916 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.26
Dispersal with no outliers: 0.26
Testing 917 of 992 .
Stability: 0.23
Dispersal of feature importance: 0.58
Dispersal with no outliers: 0.58
Testing 918 of 992 .
Stability: 0.13
Dispersal of feature importance: 0.32
Dispersal with no outliers: 0.32
Testing 919 of 992 .
Stability: 0.24
Dispersal of feature importance: 0.4
Dispersal with no outliers: 0.4
Testing 920 of 992 .
Stability: 0.23
Dispersal of feature importance: 0.45
Dispersal with no outliers: 0.45
Testing 921 of 992 .
Stability: 0.19
Dispersal of feature importance: 0.6
Dispersal with no outliers: 0.6
Testing 922 of 992 .
Stability: 0.2
Dispersal of feature importance: 0.51
Dispersal with no outliers: 0.51
Testing 923 of 992 .
Stability: 0.25
Dispersal of featu

Stability: 0.23
Dispersal of feature importance: 0.23
Dispersal with no outliers: 0.23
Testing 991 of 992 .
Stability: 0.17
Dispersal of feature importance: 0.41
Dispersal with no outliers: 0.41
Testing 992 of 992 .
Stability: 0.19
Dispersal of feature importance: 0.49
Dispersal with no outliers: 0.49


In [13]:
pd.concat(all_results).to_csv(os.path.join(PATH,"%s/%s/%s/samples/results.csv") % (dataset_ref, cls_method, method_name))

In [18]:
all_results

[    Case ID  Actual  Prefix Length  Prediction  Prediction Probability  \
 0     AAA_3       1              3           0                0.999815   
 1     AAA_4       1              4           0                0.999460   
 2       ABA       0              1           1                0.919434   
 3     ABA_5       0              5           0                0.999940   
 4    ADA_10       1             10           1                0.989788   
 ..      ...     ...            ...         ...                     ...   
 987    ZU_8       0              8           0                0.997607   
 988    ZW_6       0              6           1                0.906008   
 989    ZW_9       0              9           0                0.993170   
 990    ZX_9       0              9           0                0.946257   
 991   ZZ_12       0             12           0                0.999858   
 
      SHAP Subset Stability  SHAP Weight Stability  \
 0                      1.0                 