This code allows the visualisation of SHAP values for interpretability measures

In [1]:
import shap
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [2]:
df = pd.read_csv("../IMDB_with_predictions.csv")[0:1000:1]
model = keras.models.load_model("../CNN_Non_Dense")

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df.review)
tokenized_texts = tokenizer.texts_to_sequences(df.review)
padded_texts = pad_sequences(tokenized_texts, padding="post", maxlen=1000)


Training the SHAP model

In [3]:
explainer = shap.Explainer(model.predict, padded_texts[0:250:1], max_evals=2001)
explainer.max_evals = 10000
# print(padded_texts[0])
first_explanation = explainer(padded_texts[0:2:1])

  21/2181 [..............................] - ETA: 11s

2023-10-08 18:25:51.208868: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




Permutation explainer:  50%|█████     | 1/2 [00:00<?, ?it/s]



Permutation explainer: 3it [00:38, 19.10s/it]               


Creating an object to store explanations vs shap values

In [4]:
first_shaps = first_explanation[0].values
word_list = list(df.iloc[0].review.split(sep=" "))
word_vs_shap = list() # list containing the respective word and its shapley additive explanation value

for i in range(len(word_list)):
    word_vs_shap.append({"Word": word_list[i], "SHAP": first_shaps[i]})

# sorting it so we can visualise it easily

def bubbleSort(arr):
    n = len(arr)
    # optimize code, so if the array is already sorted, it doesn't need
    # to go through the entire process
    swapped = False
    # Traverse through all array elements
    for i in range(n-1):
        # range(n) also work but outer loop will
        # repeat one time more than needed.
        # Last i elements are already in place
        for j in range(0, n-i-1):
 
            # traverse the array from 0 to n-i-1
            # Swap if the element found is greater
            # than the next element
            if arr[j]["SHAP"] < arr[j + 1]["SHAP"]:
                swapped = True
                arr[j], arr[j + 1] = arr[j + 1], arr[j]
         
        if not swapped:
            # if we haven't needed to make a single swap, we 
            # can just exit the main loop.
            return
sorted_word_vs_shap = bubbleSort(word_vs_shap)
print(word_vs_shap)


[{'Word': 'painted', 'SHAP': 0.21979196392646205}, {'Word': 'for', 'SHAP': 0.10614891833653248}, {'Word': 'violence,', 'SHAP': 0.10118779145937878}, {'Word': 'say', 'SHAP': 0.09047154737312668}, {'Word': 'their', 'SHAP': 0.059566450598749765}, {'Word': 'romance...OZ', 'SHAP': 0.055156040228148406}, {'Word': 'it', 'SHAP': 0.03637700549588768}, {'Word': 'unflinching', 'SHAP': 0.03212859652994665}, {'Word': 'prison', 'SHAP': 0.026642052377574166}, {'Word': 'skills', 'SHAP': 0.026482229044373223}, {'Word': 'Italians,', 'SHAP': 0.026429355219879658}, {'Word': 'first', 'SHAP': 0.026026878334073467}, {'Word': 'of', 'SHAP': 0.02381881422130394}, {'Word': 'Oz,', 'SHAP': 0.023388539068400827}, {'Word': 'middle', 'SHAP': 0.021363169247633762}, {'Word': "couldn't", 'SHAP': 0.019697546963870938}, {'Word': 'pictures', 'SHAP': 0.016679492221983283}, {'Word': 'high', 'SHAP': 0.016181443314271793}, {'Word': '/><br', 'SHAP': 0.015958401661191557}, {'Word': 'on', 'SHAP': 0.015375285387854098}, {'Word': '

Visualising the SHAP values

In [25]:
import tkinter as tk
from tkinter import *
from tkinter import ttk
max_size=64

root = tk.Tk()
frm = ttk.Frame(root, padding=10)
frm.grid()
frm.rowconfigure(max_size*3) 
frm.columnconfigure(max_size)
sorted_word_vs_shap = word_vs_shap
layer = 1; position = 0
for i in range(0, 7):
    if(i != 0):
        position += int(max_size / pow(2, layer-2))
    else:
        position += 32

    if(i > pow(2, layer-2)):
        position = int(max_size / pow(2, layer+1))
        layer += 2
        print("->i: {} position: {}".format(i, position))

    print("i: {} position: {} layer: {}".format(i, position, layer))
    # the starting position is max_size / pow(2, layer)
    ttk.Label(root, text=round(sorted_word_vs_shap[i]["SHAP"], 5), width=30, wraplength=50, justify="center", font=("Arial", 8)).grid(column=position, row=layer)
    ttk.Label(root, text=sorted_word_vs_shap[i]["Word"], width=30, wraplength=50, justify="center", font=("Arial", 8)).grid(column=position, row=layer+1)


# root.mainloop()

i: 0 position: 32 layer: 1
->i: 1 position: 16
i: 1 position: 16 layer: 3
i: 2 position: 48 layer: 3
->i: 3 position: 4
i: 3 position: 4 layer: 5
i: 4 position: 12 layer: 5
i: 5 position: 20 layer: 5
i: 6 position: 28 layer: 5


In [26]:
root.mainloop()

KeyboardInterrupt: 

: 