# SHAP Kernel Explainer for Tabular Data via XAI

### Step 1: Import libraries

In [1]:
# Some auxiliary imports for the tutorial
import sys
import random
import numpy as np
from pprint import pprint
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
import shap
import os

# Set seed for reproducibility
np.random.seed(123456)

# Set the path so that we can import the Explainer
sys.path.append('../../')

# Main XAI imports
import xai
from xai.explainer import Explainer

  from numpy.core.umath_tests import inner1d


### Step 2: Train a model on a sample dataset

In [2]:
# Load the dataset and prepare training and test sets
raw_data = datasets.load_breast_cancer()
X, y = raw_data['data'], raw_data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Instantiate a classifier, train, and evaluate on test set
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.956140350877193

In [3]:
raw_data['feature_names']

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

### Step 3: Instantiate the explainer

In [4]:
# Instantiate LimeTabularExplainer via the Explainer interface
explainer = Explainer(domain=xai.DOMAIN.TABULAR, algorithm=xai.ALG.SHAP)

### Step 4: Build the explainer

In [5]:
explainer.build_explainer(
    predict_fn=clf.predict_proba,
    data=X_train,
    feature_names=raw_data['feature_names']
)

Using 455 background data samples could cause slower run times. Consider using shap.kmeans(data, K) to summarize the background as K weighted samples.


### Step 5: Generate some explanations

In [6]:
clf.predict_proba(X_test[0].reshape(1, -1))

array([[0., 1.]])

In [7]:
exp = explainer.explain_instance(
    instance=X_test[0],
    nsamples=None,
    num_features=10
)

pprint(exp)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


{0: {'confidence': 0.0,
     'explanation': [{'feature': 'mean texture = 11.97',
                      'score': -0.05706698133719296},
                     {'feature': 'mean area = 288.5',
                      'score': -0.0599174206393038},
                     {'feature': 'worst radius = 10.62',
                      'score': -0.06305883115185068},
                     {'feature': 'worst perimeter = 66.53',
                      'score': -0.10051281591924938},
                     {'feature': 'worst area = 342.9',
                      'score': -0.08911428062273208}]},
 1: {'confidence': 1.0,
     'explanation': [{'feature': 'mean texture = 11.97',
                      'score': 0.05706698133719329},
                     {'feature': 'mean area = 288.5',
                      'score': 0.059917420639304386},
                     {'feature': 'worst radius = 10.62',
                      'score': 0.06305883115185107},
                     {'feature': 'worst perimeter = 66.53',
         

### Step 6: Save and load the explainer

In [8]:
# Save the explainer somewhere

explainer.save_explainer('artefacts/shap_tabular.pkl')

In [9]:
# Load the saved explainer in a new Explainer instance

new_explainer = Explainer(domain=xai.DOMAIN.TABULAR, algorithm=xai.ALG.SHAP)
new_explainer.load_explainer('artefacts/shap_tabular.pkl')

exp = new_explainer.explain_instance(
    instance=X_test[0],
    nsamples=None,
    num_features=10
)

pprint(exp)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


{0: {'confidence': 0.0,
     'explanation': [{'feature': 'mean texture = 11.97',
                      'score': -0.05276770912762013},
                     {'feature': 'mean area = 288.5',
                      'score': -0.06534141817645864},
                     {'feature': 'worst radius = 10.62',
                      'score': -0.062258506620849946},
                     {'feature': 'worst perimeter = 66.53',
                      'score': -0.09469370474349056},
                     {'feature': 'worst area = 342.9',
                      'score': -0.09460899100190961}]},
 1: {'confidence': 1.0,
     'explanation': [{'feature': 'mean texture = 11.97',
                      'score': 0.05276770912762074},
                     {'feature': 'mean area = 288.5',
                      'score': 0.06534141817645922},
                     {'feature': 'worst radius = 10.62',
                      'score': 0.06225850662085014},
                     {'feature': 'worst perimeter = 66.53',
        

In [10]:
os.remove('artefacts/shap_tabular.pkl')