In [19]:
import pandas as pd
import altair as alt
import pmlb

In [20]:
import warnings
warnings.filterwarnings('ignore')

### Load the Dataset and the model Prediction

In [21]:
df = pmlb.fetch_data('pima')

# impute the missing input feature values with the median of the target class  
imputeFeatures = ['plasma glucose', 'Diastolic blood pressure', 'Triceps skin fold thickness', 'Body mass index', '2-Hour serum insulin']
for feature in imputeFeatures:
    df.loc[(df.target==0) & (df[feature] == 0), feature] = df[df.target==0][feature].median()
    df.loc[(df.target==1) & (df[feature] == 0), feature] = df[df.target==1][feature].median()

In [22]:
# split and normalized 
X = df.drop(['target'], axis=1)
y = df['target']


### Load all the feature attributions and the model Prediction

In [23]:
# shap 
shap_filename = '../Weights/Attr/Shap_nn.pkl'
shap_values_df = pd.read_pickle(shap_filename)
shap_values = shap_values_df.melt(ignore_index=False)

# Lime 
lime_weights_filename = '../Weights/Attr/lime_weights.pkl'
lime_weights_df = pd.read_pickle(lime_weights_filename)
lime_weights = lime_weights_df.melt(ignore_index=False)

# Integrated Gradients
ig_attr_filename = '../Weights/Attr/ig_attr.pkl'
ig_attr_df = pd.read_pickle(ig_attr_filename)
ig_attr = ig_attr_df.melt(ignore_index=False)

# Deep Lift
deepLift_filename = '../Weights/Attr/deepLift_attr.pkl'
deepLift_df = pd.read_pickle(deepLift_filename)
deepLift_attr = deepLift_df.melt(ignore_index=False)

# Model Prediction
predictProb_filename = '../Weights/Model/predict_prob.pkl'
predict_prob_df = pd.read_pickle(predictProb_filename)

### One bar chart for each feature attribution method

In [24]:
# Put all feature attributions into a single dataframe
Attr_ind = X.melt(ignore_index=False)
Attr_ind.columns = ['feature_name', 'feature_value']
Attr_ind['feature_and_value'] = Attr_ind['feature_name'] + ' = ' + Attr_ind['feature_value'].astype(str)
Attr_ind['shap_value'] = shap_values['value']
Attr_ind['lime_weight'] = lime_weights['value']
Attr_ind['ig_attr'] = ig_attr['value']
Attr_ind['deepLift_attr'] = deepLift_attr['value']

In [25]:
attr_names = ['lime_weight', 'shap_value', 'ig_attr', 'deepLift_attr']
method_names = ['lime', 'shap', 'ig', 'deepLift']
for i in range(len(method_names)):
    Attr_ind[method_names[i]+'_is_positive'] = Attr_ind[attr_names[i]] > 0

In [27]:
def Ind_Attr(values, data_index):
    brush = alt.selection_multi(on='mouseover', toggle=True, empty='none')

    base = alt.Chart(values).mark_bar().encode(
        opacity = alt.condition(brush, alt.value(1.0), alt.value(0.3))
    ).add_selection(
        brush
    ).properties(
        width=200,
        height=200
    )
    ind_shap_bar = base.encode(
        y=alt.Y('feature_and_value', sort='-x', title = ''),
        x='shap_value',
        color=alt.Color('shap_is_positive', legend=None, scale=alt.Scale(range=['crimson', 'steelblue'])),
    )

    ind_lime_bar =base.encode(
        y=alt.Y('feature_and_value', sort='-x', title = ''),
        x='lime_weight',
        color=alt.Color('lime_is_positive', legend=None, scale=alt.Scale(range=['crimson', 'steelblue']))
    )

    ind_ig_bar = base.encode(
        y=alt.Y('feature_and_value', sort='-x', title = ''),
        x='ig_attr',
        color=alt.Color('ig_is_positive', legend=None, scale=alt.Scale(range=['crimson', 'steelblue']))
    )

    ind_deepLift_bar = base.encode(
        y=alt.Y('feature_and_value', sort='-x', title = ''),
        x='deepLift_attr',
        color=alt.Color('deepLift_is_positive', legend=None, scale=alt.Scale(range=['crimson', 'steelblue']))
    )

    row1 = ind_shap_bar | ind_lime_bar  
    row2 = ind_ig_bar | ind_deepLift_bar  
    
    model_predict = round(predict_prob_df.iloc[data_index].values[0], 2)
    ind_bar = alt.vconcat(row1, row2, title = 'model predict: ' + str(model_predict))

    return ind_bar


In [35]:
# visualize feature attribution for the ith instance
i = 400
Ind_Attr(Attr_ind.loc[i], i)