In [178]:
import pandas as pd
import altair as alt
from altair.expr import datum
import warnings
warnings.filterwarnings('ignore')

In [179]:
## Load the dataframes 
subset_analysis_filename = '../Saved/4vis/subset_analysis.pkl'
subset_analysis_df = pd.read_pickle(subset_analysis_filename)

data_filename = '../Saved/Model/data.pkl'
df =  pd.read_pickle(data_filename)
X = df.drop(['target'], axis=1)
y = df['target']

In [183]:
# for fixing the x and y ranges of the visualization 
max_df = subset_analysis_df.max()
min_df = subset_analysis_df.min()

# base common chart
brush = alt.selection_interval()
base = alt.Chart(subset_analysis_df).encode(
    opacity = alt.condition(brush, alt.value(1.0), alt.value(0.5))
).add_selection(
    brush
).properties(
    width=350,
    height=350
)


In [None]:
method_name_to_attr_name = {
    'shap' : 'shap_value',
    'lime': 'lime_weight', 
    'ig':  'ig_attr', 
    'deepLift':'deepLift_attr' 
}

In [None]:
method_to_featNames2abs = {}
method_to_abs2featNames = {}
for method_name, attr_name in method_name_to_attr_name.items():
    featNames2abs = {}
    abs2featNames = {}
    for feature_name in X.columns:
        abs_attr_name = feature_name + '_abs_' + method_name_to_attr_name[method_name]
        featNames2abs[feature_name] =  feature_name + '_abs_' + method_name_to_attr_name[method_name]
        abs2featNames[abs_attr_name] = feature_name
    method_to_abs2featNames[method_name] = abs2featNames
    method_to_featNames2abs[method_name] = featNames2abs


In [None]:
# def Get_sliced_mean_attr_plot(method_name):
    

In [185]:
def Get_feature_value_vs_mean_attr_plot(subset_analysis_df, feature_name, method_name):
    # distribution of feature value plot 
    feature_value_name = feature_name + '_value'
    max_range_feature_value = max_df[feature_value_name]
    min_range_feature_value = min_df[feature_value_name]
    feature_value_bin_step = (max_range_feature_value - min_range_feature_value)/20

    view_feature_value = base.mark_bar().encode(
        x= alt.X(feature_value_name, bin=alt.Bin(step = feature_value_bin_step, extent=[min_range_feature_value, max_range_feature_value])), 
        y = alt.Y('count()'),
        color = 'target:N'
    )

    feature_attr_name = feature_name + '_' + method_name_to_attr_name[method_name]
    abs_feature_attr_name = feature_name + '_abs_' + method_name_to_attr_name[method_name]

    # sliced mean attr plot 
    max_rang_feature_attr = max_df[feature_attr_name]
    min_range_feature_attr = min_df[feature_attr_name]
    subset_analysis_df = subset_analysis_df.rename(columns = method_to_abs2featNames[method_name])
    view_sliced_mean_attr_ = alt.Chart(subset_analysis_df).transform_fold(
        X.columns.tolist(),
        as_=['key', 'value']
    ).mark_bar().encode(
        y= alt.Y('key:N', title = 'feature_name'),
        x= alt.X('mean(value):Q', title = 'mean absolute ' + method_name)
    ).transform_filter(
        brush
    )
    subset_analysis_df = subset_analysis_df.rename(columns= method_to_featNames2abs[method_name])
    return view_feature_value | view_sliced_mean_attr_ 

In [187]:
Get_feature_value_vs_mean_attr_plot(subset_analysis_df, 'glucose', 'shap')

Unnamed: 0,Pregnant_value,glucose_value,blood pressure_value,skin thickness_value,Insulin Level_value,BMI_value,Pedigree_value,Age_value,target,Pregnant_shap_value,...,Pedigree_abs_deepLift_attr,Age_abs_deepLift_attr,Pregnant_deepLift_rank,glucose_deepLift_rank,blood pressure_deepLift_rank,skin thickness_deepLift_rank,Insulin Level_deepLift_rank,BMI_deepLift_rank,Pedigree_deepLift_rank,Age_deepLift_rank
0,4.0,117.0,62.0,12.0,0.0,29.7,0.380,30.0,1,-0.005457,...,0.430234,2.170650,-2.0,1.0,-3.0,-5.0,8.0,-6.0,-4.0,-7.0
1,4.0,158.0,78.0,27.0,0.0,32.9,0.803,31.0,1,-0.073003,...,0.130780,1.729909,-1.0,6.0,2.0,-5.0,8.0,-3.0,4.0,-7.0
2,2.0,118.0,80.0,27.0,0.0,42.9,0.693,21.0,1,0.078111,...,0.272369,4.757761,3.0,-4.0,-5.0,1.0,8.0,-2.0,6.0,-7.0
3,13.0,129.0,74.0,30.0,0.0,39.9,0.569,44.0,1,-0.785230,...,0.035855,0.780753,-7.0,4.0,-5.0,2.0,8.0,-3.0,-1.0,6.0
4,5.0,162.0,104.0,27.0,0.0,37.7,0.151,52.0,1,-0.319498,...,0.656002,0.051271,-2.0,5.0,4.0,1.0,8.0,6.0,7.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,1.0,81.0,74.0,41.0,57.0,46.3,1.096,32.0,0,-0.558431,...,1.860716,2.207659,-3.0,-4.0,-2.0,5.0,8.0,1.0,6.0,-7.0
764,4.0,94.0,65.0,22.0,39.0,24.7,0.148,21.0,0,-0.066239,...,0.954503,2.835301,3.0,-6.0,-1.0,-2.0,8.0,-5.0,-4.0,-7.0
765,3.0,158.0,64.0,13.0,387.0,31.2,0.295,24.0,0,0.031296,...,0.346940,2.501210,-4.0,8.0,2.0,-6.0,1.0,-5.0,-3.0,-7.0
766,0.0,57.0,60.0,21.0,39.0,21.7,0.735,67.0,0,-0.362057,...,0.248714,4.987169,-2.0,-7.0,-4.0,3.0,8.0,-5.0,1.0,-6.0


In [None]:
#feature_value_vs_attr_plot distribution

In [None]:
# Get_feature_value_vs_signed_rank_plot('Pedigree')