In [253]:
import pandas as pd
import altair as alt
from altair.expr import datum
import warnings
warnings.filterwarnings('ignore')

In [254]:
## Load the Dataset and the model Prediction
data_filename = '../Saved/Model/data.pkl'
df =  pd.read_pickle(data_filename)

In [255]:
dict_name_to_rank_df = {
    'shap': pd.read_pickle('../Saved/Attr/shap_rank.pkl'),
    'lime': pd.read_pickle('../Saved/Attr/lime_rank.pkl'),
    'ig': pd.read_pickle('../Saved/Attr/ig_rank.pkl'), 
    'deepLift': pd.read_pickle('../Saved/Attr/deepLift_rank.pkl')
}
dict_name_to_attr_df = {
    'shap':  pd.read_pickle('../Saved/Attr/Shap_nn.pkl'), 
    'lime':  pd.read_pickle('../Saved/Attr/Shap_nn.pkl'), 
    'ig': pd.read_pickle('../Saved/Attr/ig_attr.pkl'), 
    'deepLift': pd.read_pickle('../Saved/Attr/deepLift_attr.pkl'),
}

In [256]:
distance_signed_rank = pd.DataFrame()
i = 0
for method_name_i, rank_df_i in dict_name_to_rank_df.items():
    j = 0
    for method_name_j, rank_df_j in dict_name_to_rank_df.items():
        if i < j:
            distance_signed_rank[method_name_i + '_' + method_name_j] = ((rank_df_i- rank_df_j) ** 2).sum(axis=1) ** 0.5
        j +=1
    i +=1
    

In [257]:
data_and_distance_rank = pd.concat([df, distance_signed_rank], axis = 1)

In [258]:
brush = alt.selection(type="interval")
base = alt.Chart(data_and_distance_rank).encode(
    opacity = alt.condition(brush, alt.value(1.0), alt.value(0.2))
).add_selection(
    brush
).properties(
    width=350,
    height=350
)

In [259]:
def Get_distance_and_feature_value_plot(feature_name):
    view_distance = base.mark_bar().encode(
        x= alt.X('shap_lime', title = 'distance of signed feature rank between shap and lime attribution method' ,bin=alt.Bin(step = 1, extent=[0, 30])), 
        y='count()'
    )
    min_range = df.min()[feature_name]
    max_range = df.max()[feature_name]
    feature_value_bin_step = round((max_range - min_range)/30)
    
    view_feature_value = alt.Chart(data_and_distance_rank).mark_bar().encode(
        x= alt.X(feature_name, bin=alt.Bin(step = feature_value_bin_step, extent=[min_range, max_range])), 
        y = alt.Y('count()'),
        color = 'target:N'
    ).transform_filter(brush)
    
    return view_distance | view_feature_value

In [260]:
Get_distance_and_feature_value_plot('glucose')

# Feature values and Signed Rank 

In [261]:
Indiv_filename = '../Saved/4vis/Indiv.pkl'
Indiv_df = pd.read_pickle(Indiv_filename)

In [262]:
for method_name, rank_df in dict_name_to_rank_df.items():
    dict_feature_name_to_signed_rank_name = {}
    for feature_name in df.drop(['target'], axis = 1).columns:
        dict_feature_name_to_signed_rank_name[feature_name] = feature_name + '_' + method_name + '_rank'
    dict_name_to_rank_df[method_name] = rank_df.rename(columns= dict_feature_name_to_signed_rank_name)

feature_value_and_attr_df = df.copy(deep=True)
for method_name, rank_df in dict_name_to_rank_df.items():
    feature_value_and_attr_df = pd.concat([feature_value_and_attr_df, rank_df], axis = 1)


In [263]:
brush = alt.selection(type="interval")
base = alt.Chart(feature_value_and_attr_df).encode(
    opacity = alt.condition(brush, alt.value(1.0), alt.value(0.5))
).add_selection(
    brush
).properties(
    width=350,
    height=350
)

In [264]:
def Get_feature_value_vs_signed_rank_plot(feature_name):
    view_feature_signed_rank = base.mark_bar().encode(
        x= alt.X(feature_name + '_shap_rank', bin=alt.Bin(step = 1)), 
        y='count()'
    )

    min_range = df.min()[feature_name]
    max_range = df.max()[feature_name]
    feature_value_bin_step = (max_range - min_range)/30
    view_feature_value = alt.Chart(feature_value_and_attr_df).mark_bar().encode(
        x= alt.X(feature_name, bin=alt.Bin(step = feature_value_bin_step, extent=[df.min()[feature_name], df.max()[feature_name]])), 
        y = alt.Y('count()'),
        color = 'target:N'
    ).transform_filter(brush)

    return view_feature_signed_rank | view_feature_value

In [265]:
Get_feature_value_vs_signed_rank_plot('Pedigree')

## Feature value and feature Attribute

In [266]:
for method_name, attr_df in dict_name_to_attr_df.items():
    dict_feature_name_to_feature_attr_name = {}
    for feature_name in df.drop(['target'], axis = 1).columns:
        dict_feature_name_to_feature_attr_name[feature_name] = feature_name + '_' + method_name + '_attr'
    dict_name_to_attr_df[method_name] = attr_df.rename(columns= dict_feature_name_to_feature_attr_name)

for method_name, attr_df in dict_name_to_attr_df.items():
    feature_value_and_attr_df = pd.concat([feature_value_and_attr_df, attr_df], axis = 1)

In [267]:
feature_value_and_attr_df

Unnamed: 0,Pregnant,glucose,blood pressure,skin thickness,Insulin Level,BMI,Pedigree,Age,target,Pregnant_shap_rank,...,Pedigree_ig_attr,Age_ig_attr,Pregnant_deepLift_attr,glucose_deepLift_attr,blood pressure_deepLift_attr,skin thickness_deepLift_attr,Insulin Level_deepLift_attr,BMI_deepLift_attr,Pedigree_deepLift_attr,Age_deepLift_attr
0,4.0,117.0,62.0,12.0,0.0,29.7,0.380,30.0,1,-1.0,...,-0.080073,-2.042362,-0.138097,0.004110,-0.424711,-0.825093,23.402227,-1.034915,-0.430234,-2.170650
1,4.0,158.0,78.0,27.0,0.0,32.9,0.803,31.0,1,-3.0,...,0.039795,-1.717826,-0.022287,0.749686,0.067303,-0.230801,19.565380,-0.120539,0.130780,-1.729909
2,2.0,118.0,80.0,27.0,0.0,42.9,0.693,21.0,1,3.0,...,1.849635,-1.006955,0.104342,-0.179098,-0.189925,0.055681,15.819101,-0.093092,0.272369,-4.757761
3,13.0,129.0,74.0,30.0,0.0,39.9,0.569,44.0,1,-6.0,...,0.102529,-0.569997,-1.701970,0.143349,-0.262904,0.038227,30.544613,-0.109584,-0.035855,0.780753
4,5.0,162.0,104.0,27.0,0.0,37.7,0.151,52.0,1,-2.0,...,-0.428390,-3.140066,-0.041911,0.328310,0.195018,0.018466,44.763817,0.451743,0.656002,0.051271
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,1.0,81.0,74.0,41.0,57.0,46.3,1.096,32.0,0,-4.0,...,4.401906,-2.481000,-0.637750,-0.973293,-0.189659,1.026502,16.927740,0.041972,1.860716,-2.207659
764,4.0,94.0,65.0,22.0,39.0,24.7,0.148,21.0,0,-2.0,...,-1.334914,-1.380617,0.731860,-2.543157,-0.154376,-0.161570,20.912298,-2.004530,-0.954503,-2.835301
765,3.0,158.0,64.0,13.0,387.0,31.2,0.295,24.0,0,1.0,...,-0.746235,-3.234273,-0.504615,2.712229,0.114909,-0.941176,0.099512,-0.710677,-0.346940,-2.501210
766,0.0,57.0,60.0,21.0,39.0,21.7,0.735,67.0,0,-2.0,...,1.279238,-6.828803,-0.474895,-6.431443,-0.799571,0.647194,30.113367,-2.409970,0.248714,-4.987169


In [268]:
brush = alt.selection(type="interval")
base = alt.Chart(feature_value_and_attr_df).encode(
    opacity = alt.condition(brush, alt.value(1.0), alt.value(0.2))
).add_selection(
    brush
).properties(
    width=350,
    height=350
)

In [269]:
def Get_feature_value_vs_attr_plot(feature_name, method_name):
    max_df = feature_value_and_attr_df.max()
    min_df = feature_value_and_attr_df.min()
    
    max_rang_feature_attr = max_df[feature_name + '_' + method_name + '_attr']
    min_range_feature_attr = min_df[feature_name + '_' + method_name + '_attr']
    feature_attr_bin_step = (max_rang_feature_attr - min_range_feature_attr)/30

    view_feature_attr = base.mark_bar().encode(
        x= alt.X(feature_name + '_shap_attr', bin=alt.Bin(step = feature_attr_bin_step)), 
        y='count()'
    )

    max_rang_feature_value = max_df[feature_name]
    min_range_feature_value = min_df[feature_name]
    feature_value_bin_step = (max_rang_feature_value - min_range_feature_value)/30

    view_feature_value = alt.Chart(feature_value_and_attr_df).mark_bar().encode(
        x= alt.X(feature_name, bin=alt.Bin(step = feature_value_bin_step, extent=[min_range_feature_value, max_rang_feature_value])), 
        y = alt.Y('count()'),
        color = 'target:N'
    ).transform_filter(brush)

    return view_feature_attr | view_feature_value

In [270]:
Get_feature_value_vs_attr_plot('BMI', 'shap')

In [271]:
def Get_feature_value_vs_attr_plot(feature_name, method_name):
    max_df = feature_value_and_attr_df.max()
    min_df = feature_value_and_attr_df.min()
    
    max_rang_feature_value = max_df[feature_name]
    min_range_feature_value = min_df[feature_name]
    feature_value_bin_step = (max_rang_feature_value - min_range_feature_value)/30

    view_feature_value = base.mark_bar().encode(
        x= alt.X(feature_name, bin=alt.Bin(step = feature_value_bin_step, extent=[min_range_feature_value, max_rang_feature_value])), 
        y = alt.Y('count()'),
        color = 'target:N'
    )

    ### ---- -
    max_rang_feature_attr = max_df[feature_name + '_' + method_name + '_attr']
    min_range_feature_attr = min_df[feature_name + '_' + method_name + '_attr']
    feature_attr_bin_step = (max_rang_feature_attr - min_range_feature_attr)/30

    feature_attr_name = feature_name + '_shap_attr'

    view_feature_attr = alt.Chart(feature_value_and_attr_df).mark_bar().encode(
        x= alt.X(feature_name + '_shap_attr', bin=alt.Bin(step = feature_attr_bin_step)), 
        y='value'
    ).transform_filter(
        brush
    ).transform_aggregate(
         feature_attr_name = 'mean('+feature_attr_name + ')'
    )



    return view_feature_value | view_feature_attr 

In [272]:
Get_feature_value_vs_attr_plot('BMI', 'shap')

In [273]:
feature_value_and_attr_df

Unnamed: 0,Pregnant,glucose,blood pressure,skin thickness,Insulin Level,BMI,Pedigree,Age,target,Pregnant_shap_rank,...,Pedigree_ig_attr,Age_ig_attr,Pregnant_deepLift_attr,glucose_deepLift_attr,blood pressure_deepLift_attr,skin thickness_deepLift_attr,Insulin Level_deepLift_attr,BMI_deepLift_attr,Pedigree_deepLift_attr,Age_deepLift_attr
0,4.0,117.0,62.0,12.0,0.0,29.7,0.380,30.0,1,-1.0,...,-0.080073,-2.042362,-0.138097,0.004110,-0.424711,-0.825093,23.402227,-1.034915,-0.430234,-2.170650
1,4.0,158.0,78.0,27.0,0.0,32.9,0.803,31.0,1,-3.0,...,0.039795,-1.717826,-0.022287,0.749686,0.067303,-0.230801,19.565380,-0.120539,0.130780,-1.729909
2,2.0,118.0,80.0,27.0,0.0,42.9,0.693,21.0,1,3.0,...,1.849635,-1.006955,0.104342,-0.179098,-0.189925,0.055681,15.819101,-0.093092,0.272369,-4.757761
3,13.0,129.0,74.0,30.0,0.0,39.9,0.569,44.0,1,-6.0,...,0.102529,-0.569997,-1.701970,0.143349,-0.262904,0.038227,30.544613,-0.109584,-0.035855,0.780753
4,5.0,162.0,104.0,27.0,0.0,37.7,0.151,52.0,1,-2.0,...,-0.428390,-3.140066,-0.041911,0.328310,0.195018,0.018466,44.763817,0.451743,0.656002,0.051271
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,1.0,81.0,74.0,41.0,57.0,46.3,1.096,32.0,0,-4.0,...,4.401906,-2.481000,-0.637750,-0.973293,-0.189659,1.026502,16.927740,0.041972,1.860716,-2.207659
764,4.0,94.0,65.0,22.0,39.0,24.7,0.148,21.0,0,-2.0,...,-1.334914,-1.380617,0.731860,-2.543157,-0.154376,-0.161570,20.912298,-2.004530,-0.954503,-2.835301
765,3.0,158.0,64.0,13.0,387.0,31.2,0.295,24.0,0,1.0,...,-0.746235,-3.234273,-0.504615,2.712229,0.114909,-0.941176,0.099512,-0.710677,-0.346940,-2.501210
766,0.0,57.0,60.0,21.0,39.0,21.7,0.735,67.0,0,-2.0,...,1.279238,-6.828803,-0.474895,-6.431443,-0.799571,0.647194,30.113367,-2.409970,0.248714,-4.987169
