In [0]:
pip install interpret

In [0]:
# Common Pkgs
import pandas as pd
import numpy as np

# Data split 
from sklearn.model_selection import train_test_split

# interpret ML
import interpret
from interpret.blackbox import ShapKernel

In [0]:
# To obtain open source data - Bank Marketing Data Set

import requests
from io import BytesIO
from zipfile import ZipFile


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip"

with ZipFile(BytesIO(requests.get(url).content), "r") as myzip:
    print(myzip.namelist())
# print content of zip file:
    with myzip.open("bank-full.csv", "r") as full_data:
        # Load Dataset bank-full
        df = pd.read_csv(full_data,sep=';')
        
    with myzip.open("bank-names.txt", "r") as bank_name:
        # Load Dataset bank_name
        info = bank_name.readlines()

  

In [0]:
# data info
for line in info:
    print(line)

In [0]:
df.shape, df.columns

In [0]:
# Check for Datatype
df.dtypes

In [0]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [0]:
# Check for missing values 
df.isnull().sum()

df1 = pd.DataFrame({col: df[col].astype('category').cat.codes for col in df}, index=df.index)

label_dict = {col: {n: cat for n, cat in enumerate(df[col].astype('category').cat.categories)} 
     for col in df}


# Features and Ylabels
Xfeatures = df1[['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome']]
ylabels = df1['y']


# Split Dataset
x_train,x_test,y_train,y_test = train_test_split(Xfeatures,ylabels,test_size=0.3,random_state=7)

In [0]:
# Train test split
train_cols = df.columns[0:-1]
label = df.columns[-1]

X = df[train_cols]
y = df[label]

seed = 123

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=seed)

In [0]:
# Train a Glassbox Model

from interpret.glassbox import ExplainableBoostingClassifier

ebm = ExplainableBoostingClassifier(random_state=seed)
ebm.fit(X_train, y_train)

In [0]:
# Explain the Glassbox
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())

In [0]:
from interpret import show

ebm_global = ebm.explain_global()
show(ebm_global)
#show([ ebm_global ])

In [0]:
# To return data used for all visualizations
ebm_global.data(-1)

In [0]:
ebm_explanation = ebm.explain_global()
plotly_fig = ebm_explanation.visualize(0)

In [0]:
plotly_fig

In [0]:
# Train black box model 

from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

# We have to transform categorical variables to use sklearn models
X_enc = pd.get_dummies(X, prefix_sep='.')
feature_names = list(X_enc.columns)
y = df[label].apply(lambda x: 0 if x == "no" else 1)  # Turning response into 0 and 1
X_train, X_test, y_train, y_test = train_test_split(X_enc, y, test_size=0.30, random_state=seed)

#Blackbox system can include preprocessing, not just a classifier!
pca = PCA()
rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)

blackbox_model = Pipeline([('pca', pca), ('rf', rf)])
blackbox_model.fit(X_train, y_train)

In [0]:
from interpret.blackbox import LimeTabular
from interpret import show

lime = LimeTabular(predict_fn=blackbox_model.predict_proba, data=X_train, random_state=seed)
lime_local = lime.explain_local(X_test[:5], y_test[:5])

show(lime_local)

In [0]:
plotly_fig_lime = lime_local.visualize(0)
plotly_fig_lime

In [0]:
shap = ShapKernel(predict_fn=blackbox_model.predict_proba, data=X_train[:1000])
shap_local = shap.explain_local(X_test[:5], y_test[:5])

show(shap_local)

In [0]:
plotly_fig_shap = shap_local.visualize(0)
plotly_fig_shap