In [28]:
import pandas as pd
import altair as alt
import pmlb
import warnings
warnings.filterwarnings('ignore')

### Load the Dataset and the model Prediction

In [29]:
df = pmlb.fetch_data('pima')

# impute the missing input feature values with the median of the target class  
imputeFeatures = ['plasma glucose', 'Diastolic blood pressure', 'Triceps skin fold thickness', 'Body mass index', '2-Hour serum insulin']
for feature in imputeFeatures:
    df.loc[(df.target==0) & (df[feature] == 0), feature] = df[df.target==0][feature].median()
    df.loc[(df.target==1) & (df[feature] == 0), feature] = df[df.target==1][feature].median()

In [30]:
# split and normalized 
X = df.drop(['target'], axis=1)
y = df['target']

### Load all the feature attributions and the model Prediction

In [31]:
# shap 
shap_filename = '../Weights/Attr/Shap_nn.pkl'
shap_values_df = pd.read_pickle(shap_filename)
shap_values = shap_values_df.melt(ignore_index=False)

# Lime 
lime_weights_filename = '../Weights/Attr/lime_weights.pkl'
lime_weights_df = pd.read_pickle(lime_weights_filename)
lime_weights = lime_weights_df.melt(ignore_index=False)

# Integrated Gradients
ig_attr_filename = '../Weights/Attr/ig_attr.pkl'
ig_attr_df = pd.read_pickle(ig_attr_filename)
ig_attr = ig_attr_df.melt(ignore_index=False)

# Deep Lift
deepLift_filename = '../Weights/Attr/deepLift_attr.pkl'
deepLift_df = pd.read_pickle(deepLift_filename)
deepLift_attr = deepLift_df.melt(ignore_index=False)

In [32]:
# Put all feature attributions into a single dataframe
Attr = X.melt(ignore_index=False)
Attr.columns = ['feature_name', 'feature_value']
Attr['shap_value'] = shap_values['value']
Attr['lime_weight'] = lime_weights['value']
Attr['ig_attr'] = ig_attr['value']
Attr['deepLift_attr'] = deepLift_attr['value']

In [33]:
attr_names = ['shap_value', 'lime_weight',  'ig_attr', 'deepLift_attr']
attr_dfs = [shap_values_df, lime_weights_df, ig_attr_df, deepLift_df]

In [34]:
method_names = ['lime', 'shap', 'ig', 'deepLift']
for i in range(len(method_names)):
    Attr[method_names[i]+'_is_positive'] = Attr[attr_names[i]] > 0

In [35]:
sorted_index = shap_values_df.abs().mean().rank()
sorted_features = [x for x, _  in sorted(zip(list(shap_values_df.columns.values), list(sorted_index.values)), key=lambda pair: pair[1], reverse=True)]

In [41]:
num_samples = 500
base = alt.Chart(data=Attr.sample(n=num_samples, random_state=42)).mark_circle().encode(
    row=alt.Row('feature_name', sort=sorted_features, spacing=0, header=alt.Header(labelAngle=0, labelAlign='left')),
    y=alt.Y('jitter:Q', axis=None),
    color=alt.Color('feature_value', scale=alt.Scale(scheme='viridis'), title=None),
    size=alt.value(32)
).properties(
    width=600,
    height=80
).transform_calculate(
    jitter='random()'
).resolve_scale(
    color='independent'
)

charts = {}
for attr_name in attr_names:
    charts[attr_name] = base.encode(x = attr_name)


### Summary Strip Plot

In [42]:
# summary plots for shap 
charts['shap_value'] 

In [43]:
charts['lime_weight'] 

In [44]:
charts['ig_attr']

In [45]:
charts['deepLift_attr']