## What-If Tool and SHAP on COMPAS keras model

This notebook shows:
- Training of a keras model on the [COMPAS](https://www.kaggle.com/danofer/compass) dataset.
- Use of What-If Tool on the trained model.
- Explanation of inference results using [SHAP](https://github.com/slundberg/shap).
- Use of What-If Tool to display SHAP values.

Copyright 2019 Google LLC.
SPDX-License-Identifier: Apache-2.0

In [0]:
#@title Install What-If Tool Widget and SHAP library
!pip install --upgrade --quiet witwidget shap

In [0]:
#@title Read training dataset from CSV {display-mode: "form"}
import pandas as pd
import numpy as np
import tensorflow as tf
import witwidget
import os
import pickle

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

from sklearn.utils import shuffle

df = pd.read_csv('https://storage.googleapis.com/what-if-tool-resources/computefest2019/cox-violent-parsed_filt.csv')

In [0]:
# Filter out entries with no indication of recidivism or no compass score
df = df[df['is_recid'] != -1]
df = df[df['decile_score'] != -1]

# Rename recidivism column
df['recidivism_within_2_years'] = df['is_recid']

# Make the COMPASS label column numeric (0 and 1), for use in our model
df['COMPASS_determination'] = np.where(df['score_text'] == 'Low', 0, 1)

df = pd.get_dummies(df, columns=['sex', 'race'])

# Get list of all columns from the dataset we will use for model input or output.
input_features = ['sex_Female', 'sex_Male', 'age', 'race_African-American', 'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other', 'priors_count', 'juv_fel_count', 'juv_misd_count', 'juv_other_count']

to_keep = input_features + ['recidivism_within_2_years', 'COMPASS_determination']

to_remove = [col for col in df.columns if col not in to_keep]
df = df.drop(columns=to_remove)

input_columns = df.columns.tolist()
labels = df['COMPASS_determination']
df.head()

In [0]:
df_for_training = df.drop(columns=['COMPASS_determination', 'recidivism_within_2_years'])

In [0]:
train_size = int(len(df_for_training) * 0.8)
train_data = df_for_training[:train_size]
train_labels = labels[:train_size]

test_data = df_for_training[train_size:]
test_labels = labels[train_size:]

test_data_with_labels = df[train_size:]

# This is the size of the array we'll be feeding into our model for each example
input_size = len(train_data.iloc[0])

In [0]:
model = Sequential()
model.add(Dense(200, input_shape=(input_size,), activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='mean_squared_error', optimizer='adam')

In [0]:
model.summary()

In [0]:
model.fit(train_data.values, train_labels.values, epochs=4, batch_size=32, validation_split=0.1)

In [0]:
# Helper methods to convert examples to/from tf.Example and vector for model prediction.
def df_to_tf_examples(df):
  examples = []
  columns = df.columns.values.tolist()
  for index, row in df.iterrows():
      example = tf.train.Example()
      for col in columns:
          if col.startswith('sex_') and row[col] == 1:
            example.features.feature[col[:3]].bytes_list.value.append(col[4:].encode('utf-8'))
          elif col.startswith('race_') and row[col] == 1:
            example.features.feature[col[:4]].bytes_list.value.append(col[5:].encode('utf-8'))
          elif df[col].dtype is np.dtype(np.int64):
              example.features.feature[col].int64_list.value.append(int(row[col]))
          elif df[col].dtype is np.dtype(np.float64):
              example.features.feature[col].float_list.value.append(row[col])
      examples.append(example)
  return examples

def from_tf_example(example):
  inp = []
  for i, col in enumerate(input_columns):
    if col == 'recidivism_within_2_years' or col == 'COMPASS_determination':
       continue
    if col.startswith('sex'):
      if example.features.feature[col[:3]].bytes_list.value and example.features.feature[col[:3]].bytes_list.value[0].decode() == col[4:]:
        inp.append(1)
      else:
        inp.append(0)
    elif col.startswith('race'):
      if example.features.feature[col[:4]].bytes_list.value and example.features.feature[col[:4]].bytes_list.value[0].decode() == col[5:]:
        inp.append(1)
      else:
        inp.append(0)
    else:
      inp.append(example.features.feature[col].int64_list.value[0])
  return inp

# For using WIT to display SHAP values, we send each vector entry to WIT as its
# own feature, as opposed to collapsing categorical features into a single
# string for display in the tool. This is because each vector entry for the
# one-hot encodings for the categorical features has its own SHAP value to
# display.
def df_to_shap_tf_examples(df):
  examples = []
  columns = df.columns.values.tolist()
  for index, row in df.iterrows():
      example = tf.train.Example()
      for col in columns:
          example.features.feature[col].int64_list.value.append(int(row[col]))
      examples.append(example)
  return examples

def from_shap_tf_example(example):
  inp = []
  for i, col in enumerate(input_columns):
    if col == 'recidivism_within_2_years' or col == 'COMPASS_determination':
       continue
    inp.append(example.features.feature[col].int64_list.value[0])
      
  return inp

In [0]:
# Convert data to tf.Example format for use in WIT
examples_for_wit = df_to_tf_examples(test_data_with_labels)
examples_for_shap_wit = df_to_shap_tf_examples(test_data_with_labels)

In [0]:
#@title Show model results in WIT
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder
num_datapoints = 1000  #@param {type: "number"}

def custom_predict(examples_to_infer):
  model_inputs = [from_tf_example(ex) for ex in examples_to_infer]
  preds = model.predict([model_inputs])
  return [[1 - pred[0], pred[0]] for pred in preds]

config_builder = WitConfigBuilder(examples_for_wit[:num_datapoints]).set_custom_predict_fn(
  custom_predict).set_target_feature('recidivism_within_2_years')

ww = WitWidget(config_builder, height=800)

In [0]:
import shap

# Create an explainer by passing a subset of our training data
explainer = shap.DeepExplainer(model, train_data.values[:200])

In [0]:
# Explain predictions of the model on the first 5 examples from our test set
shap_values = explainer.shap_values(test_data.values[:5])
shap_values

In [0]:
#@title Show model results and SHAP values in WIT
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder
num_datapoints = 1000  #@param {type: "number"}

# Return model predictions and SHAP values for each inference.
def custom_predict_with_shap(examples_to_infer):
  model_inputs = [from_shap_tf_example(ex) for ex in examples_to_infer]
  preds = model.predict([model_inputs])
  preds = [[1 - pred[0], pred[0]] for pred in preds]

  shap_output = explainer.shap_values(np.array(model_inputs))[0]
  attributions = []
  for shap in shap_output:
    attrs = {}
    for i, col in enumerate(df_for_training.columns):
      attrs[col] = shap[i]
    attributions.append(attrs)
  ret = {'predictions': preds, 'attributions': attributions}
  return ret

config_builder = WitConfigBuilder(examples_for_shap_wit[:num_datapoints]).set_custom_predict_fn(
  custom_predict_with_shap).set_target_feature('recidivism_within_2_years')

ww = WitWidget(config_builder, height=800)