In [13]:
import pandas as pd
import altair as alt
import pmlb
import warnings
warnings.filterwarnings('ignore')

### Load the Dataset and the model Prediction

In [14]:
df = pmlb.fetch_data('pima')

# impute the missing input feature values with the median of the target class  
imputeFeatures = ['plasma glucose', 'Diastolic blood pressure', 'Triceps skin fold thickness', 'Body mass index', '2-Hour serum insulin']
for feature in imputeFeatures:
    df.loc[(df.target==0) & (df[feature] == 0), feature] = df[df.target==0][feature].median()
    df.loc[(df.target==1) & (df[feature] == 0), feature] = df[df.target==1][feature].median()

In [15]:
# split and normalized 
X = df.drop(['target'], axis=1)
y = df['target']

### Load all the feature attributions and the model Prediction

In [16]:
shap_filename = '../Weights/Attr/Shap_nn.pkl'
lime_weights_filename = '../Weights/Attr/lime_weights.pkl'
ig_attr_filename = '../Weights/Attr/ig_attr.pkl'
deepLift_filename = '../Weights/Attr/deepLift_attr.pkl'

In [17]:
shap_values_df = pd.read_pickle(shap_filename)
lime_weights_df = pd.read_pickle(lime_weights_filename)
ig_attr_df = pd.read_pickle(ig_attr_filename)
deepLift_df = pd.read_pickle(deepLift_filename)

In [18]:
attr_names = ['shap_value', 'lime_weight',  'ig_attr', 'deepLift_attr']
method_names = ['lime', 'shap', 'ig', 'deepLift']

In [19]:
def Load_Attr(feature_name):   
    shap_values = shap_values_df.melt(ignore_index=False, value_vars=[feature])
    lime_weights = lime_weights_df.melt(ignore_index=False, value_vars=[feature])
    ig_attr = ig_attr_df.melt(ignore_index=False, value_vars=[feature])
    deepLift_attr = deepLift_df.melt(ignore_index=False, value_vars=[feature])

    Attr_feature = X.melt(ignore_index=False, value_vars=[feature])
    Attr_feature.columns = ['feature_name', 'feature_value']
    Attr_feature['shap_value'] = shap_values['value']
    Attr_feature['lime_weight'] = lime_weights['value']
    Attr_feature['ig_attr'] = ig_attr['value']
    Attr_feature['deepLift_attr'] = deepLift_attr['value']
    
    for i in range(len(method_names)):
        Attr_feature[method_names[i]+'_is_positive'] = Attr_feature[attr_names[i]] > 0
    
    return Attr_feature

### View Scatter Plot for each feature by all attribution method

In [20]:
attr_names = ['shap_value', 'lime_weight',  'ig_attr', 'deepLift_attr']
method_names = ['lime', 'shap', 'ig', 'deepLift']

In [21]:
sorted_index = pd.read_pickle(shap_filename).abs().mean().rank()
sorted_features = [x for x, _  in sorted(zip(list(X.columns.values), list(sorted_index.values)), key=lambda pair: pair[1], reverse=True)]

In [22]:
charts = []
num_samples = 5000
brush = alt.selection_interval()
for feature in sorted_features:
    charts_per_feature = []
    Attr_feature = Load_Attr(feature)
    num_samples = 500

    Attr_sampled = Attr_feature.sample(n=num_samples, random_state=42)
    base = alt.Chart(Attr_sampled).mark_circle().encode(
        x=alt.X('feature_value', title = feature + ' value'),
        opacity = alt.condition(brush, alt.value(1.0), alt.value(0.3))
    ).resolve_scale(
        x = "independent",
        y = "independent"
    ).add_selection(
        brush
    )

    for i in range(len(attr_names)):
        charts_per_feature.append(base.encode(
            y = attr_names[i],
            color=alt.Color(method_names[i]+'_is_positive', legend=None, scale=alt.Scale(range=['crimson', 'steelblue']))
        )
    )
    charts.append(charts_per_feature)

In [23]:
def View_scatter_feature(feature_index):
    row1 = charts[feature_index][0] | charts[feature_index][1] 
    row2 = charts[feature_index][2] | charts[feature_index][3] 
    return alt.vconcat(row1, row2)

In [24]:
View_scatter_feature(feature_index=0)