In [141]:
import pandas as pd
import altair as alt
import numpy as np
import pmlb

In [142]:
import warnings
warnings.filterwarnings('ignore')

### Load the Dataset 

In [143]:
df = pmlb.fetch_data('pima')

# impute the missing input feature values with the median of the target class  
imputeFeatures = ['plasma glucose', 'Diastolic blood pressure', 'Triceps skin fold thickness', 'Body mass index', '2-Hour serum insulin']
for feature in imputeFeatures:
    df.loc[(df.target==0) & (df[feature] == 0), feature] = df[df.target==0][feature].median()
    df.loc[(df.target==1) & (df[feature] == 0), feature] = df[df.target==1][feature].median()

In [144]:
# split and normalized 
X = df.drop(['target'], axis=1)
y = df['target']
X_normalized = (X - X.mean()) / X.std()

### Load all the feature attributions

In [145]:
# shap 
shap_filename = '../Weights/Attr/Shap_nn.pkl'
shap_values_df = pd.read_pickle(shap_filename)
shap_values = shap_values_df.melt(ignore_index=False)

# Lime 
lime_weights_filename = '../Weights/Attr/lime_weights.pkl'
lime_weights_df = pd.read_pickle(lime_weights_filename)
lime_weights = lime_weights_df.melt(ignore_index=False)

# Integrated Gradients
ig_attr_filename = '../Weights/Attr/ig_attr.pkl'
ig_attr_df = pd.read_pickle(ig_attr_filename)
ig_attr = ig_attr_df.melt(ignore_index=False)

# Deep Lift
deepLift_filename = '../Weights/Attr/deepLift_attr.pkl'
deepLift_df = pd.read_pickle(deepLift_filename)
deepLift_attr = deepLift_df.melt(ignore_index=False)


In [146]:
# Put all feature attributions into a single dataframe
Attr = X.melt(ignore_index=False)
Attr.columns = ['feature', 'feature_value']
Attr['shap_values'] = shap_values['value']
Attr['lime_weights'] = lime_weights['value']
Attr['ig_attr'] = ig_attr['value']
Attr['deepLift_attr'] = deepLift_attr['value']

### Mean Absolute Value of Feature Attributions by Each Method

In [147]:
mean_attr = pd.DataFrame(shap_values_df.abs().mean()).reset_index()
mean_attr.columns = ['feature', 'shap']
mean_attr['lime'] = lime_weights_df.abs().mean().values
mean_attr['ig'] = ig_attr_df.abs().mean().values
mean_attr['deepLift'] = deepLift_df.abs().mean().values

In [148]:
attr_methods = ['shap', 'lime', 'ig', 'deepLift']
alt.Chart(mean_attr).mark_bar().encode(
    y=alt.Y('feature', sort='-x'),
    x=alt.X(alt.repeat('row'), bin=False, type='quantitative'),
).repeat(
    row=attr_methods
)