In [17]:
import pandas as pd
import altair as alt

In [18]:
import warnings
warnings.filterwarnings('ignore')

### Load all the feature attributions and the model Prediction

In [19]:
# shap 
shap_filename = '../Weights/Attr/Shap_nn.pkl'
shap_values_df = pd.read_pickle(shap_filename)
shap_values = shap_values_df.melt(ignore_index=False)

# Lime 
lime_weights_filename = '../Weights/Attr/lime_weights.pkl'
lime_weights_df = pd.read_pickle(lime_weights_filename)
lime_weights = lime_weights_df.melt(ignore_index=False)

# Integrated Gradients
ig_attr_filename = '../Weights/Attr/ig_attr.pkl'
ig_attr_df = pd.read_pickle(ig_attr_filename)
ig_attr = ig_attr_df.melt(ignore_index=False)

# Deep Lift
deepLift_filename = '../Weights/Attr/deepLift_attr.pkl'
deepLift_df = pd.read_pickle(deepLift_filename)
deepLift_attr = deepLift_df.melt(ignore_index=False)

# Model Prediction
predictProb_filename = '../Weights/Model/predict_prob.pkl'
predict_prob_df = pd.read_pickle(predictProb_filename)


## Generating Chart to be shown at the end of the notebook 

### A bar chart for each method: mean absolute value of feature attributions

In [20]:
mean_attr = pd.DataFrame(shap_values_df.abs().mean()).reset_index()
mean_attr.columns = ['feature_name', 'shap']

mean_attr['lime'] = lime_weights_df.abs().mean().values
mean_attr['ig'] = ig_attr_df.abs().mean().values
mean_attr['deepLift'] = deepLift_df.abs().mean().values

In [21]:
# rename feature names so that the visualization is not cluttered 
mean_attr['feature_name'][2] = 'Blood Pressure'
mean_attr['feature_name'][3] = 'Skin thickness'
mean_attr['feature_name'][4] = 'Insulin Level'
mean_attr['feature_name'][6] = 'Pedigree'

In [22]:
attr_methods = ['lime', 'shap', 'ig', 'deepLift']

In [23]:
brush = alt.selection_multi(on='mouseover', toggle=True, empty='none')

base = alt.Chart(mean_attr).mark_bar().encode(
    color=alt.condition(brush, alt.value('green'), alt.value('steelblue'))
).add_selection(
    brush
).properties(
    width=200,
    height=200
)

In [24]:
lime_bar = base.encode(x='lime', y=alt.Y('feature_name', sort='-x', title = ''))
shap_bar = base.encode(x='shap', y=alt.Y('feature_name', sort='-x', title = ''))
ig_bar =  base.encode(x='ig', y=alt.Y('feature_name', sort='-x', title = ''))
deepLift_bar =  base.encode(x='deepLift', y=alt.Y('feature_name', sort='-x', title = ''))
row1 = lime_bar | shap_bar
row2 = ig_bar | deepLift_bar
mean_attr_bar = alt.vconcat(row1, row2)

### Group Bar Chart Approach

In [25]:
# important note: the higher the rank, the more important
ranks_df = pd.DataFrame()
ranks_df['feature_name'] = mean_attr['feature_name']
for method in attr_methods:
    ranks_df[method] = mean_attr[method].rank()

In [26]:
ranks_melted = ranks_df.melt(id_vars=['feature_name'])
ranks_melted.columns = ['feature_name', 'method', 'feature_rank']
ranks_df['sum'] = ranks_df.sum(axis=1)
sorted_feature = ranks_df.sort_values(by = ['sum'], ascending = False)['feature_name'].to_list()

In [27]:
# group bar chart visualization 
group_bar = alt.Chart(ranks_melted).mark_bar().encode(
    x = alt.X('method', title=''),
    y = 'feature_rank',
    column=alt.Column('feature_name:N', sort=sorted_feature),
    color='method',
    
)

## Show the charts

In [28]:
mean_attr_bar

In [29]:
group_bar