This code allows the visualisation of SHAP values for interpretability measures

In [1]:
import shap
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [8]:
df = pd.read_csv("../IMDB_with_predictions.csv")[0:1000:1]
model = keras.models.load_model("../CNN_Non_Dense")

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df.review)
tokenized_texts = tokenizer.texts_to_sequences(df.review)
padded_texts = pad_sequences(tokenized_texts, padding="post", maxlen=1000)


Training the SHAP model

In [10]:
explainer = shap.Explainer(model.predict, padded_texts[0:250:1], max_evals=2001)
explainer.max_evals = 10000
# print(padded_texts[0])
first_explanation = explainer(padded_texts[0:2:1])



Permutation explainer:  50%|█████     | 1/2 [00:00<?, ?it/s]



Permutation explainer: 3it [00:38, 19.42s/it]               


Creating an object to store explanations vs shap values

In [42]:
first_shaps = first_explanation[0].values
word_list = list(df.iloc[0].review.split(sep=" "))
word_vs_shap = list() # list containing the respective word and its shapley additive explanation value

for i in range(len(word_list)):
    word_vs_shap.append({"Word": word_list[i], "SHAP": first_shaps[i]})

# sorting it so we can visualise it easily

def bubbleSort(arr):
    n = len(arr)
    # optimize code, so if the array is already sorted, it doesn't need
    # to go through the entire process
    swapped = False
    # Traverse through all array elements
    for i in range(n-1):
        # range(n) also work but outer loop will
        # repeat one time more than needed.
        # Last i elements are already in place
        for j in range(0, n-i-1):
 
            # traverse the array from 0 to n-i-1
            # Swap if the element found is greater
            # than the next element
            if arr[j]["SHAP"] < arr[j + 1]["SHAP"]:
                swapped = True
                arr[j], arr[j + 1] = arr[j + 1], arr[j]
         
        if not swapped:
            # if we haven't needed to make a single swap, we 
            # can just exit the main loop.
            return
sorted_word_vs_shap = bubbleSort(word_vs_shap)
print(word_vs_shap)


[{'Word': 'for', 'SHAP': 0.19821724590403395}, {'Word': 'painted', 'SHAP': 0.15581328305081116}, {'Word': 'violence,', 'SHAP': 0.0925168165958894}, {'Word': 'romance...OZ', 'SHAP': 0.0816423126135487}, {'Word': 'their', 'SHAP': 0.051297623365014755}, {'Word': 'and', 'SHAP': 0.033230820278185996}, {'Word': 'Watching', 'SHAP': 0.027766746308188928}, {'Word': 'of', 'SHAP': 0.02457786173594287}, {'Word': 'Italians,', 'SHAP': 0.024425490847061165}, {'Word': 'skills', 'SHAP': 0.023661259703785617}, {'Word': 'so', 'SHAP': 0.0236258314106976}, {'Word': '/>The', 'SHAP': 0.023469660207483672}, {'Word': 'forget', 'SHAP': 0.02181700992747207}, {'Word': 'kill', 'SHAP': 0.01992651870083134}, {'Word': 'say', 'SHAP': 0.018928893630036345}, {'Word': 'and', 'SHAP': 0.018008812061743817}, {'Word': 'high', 'SHAP': 0.016501077777484205}, {'Word': 'prison', 'SHAP': 0.015397845680417938}, {'Word': 'as', 'SHAP': 0.015052941976211959}, {'Word': 'right', 'SHAP': 0.013996227122552274}, {'Word': 'it', 'SHAP': 0.0

Visualising the SHAP values