In [99]:
import pandas as pd
import altair as alt
from altair.expr import datum
import warnings
warnings.filterwarnings('ignore')

In [100]:
subset_analysis_filename = '../Saved/4vis/subset_analysis.pkl'
subset_analysis_df = pd.read_pickle(subset_analysis_filename)

In [101]:
## Load the Dataset and the model Prediction
data_filename = '../Saved/Model/data.pkl'
df =  pd.read_pickle(data_filename)
X = df.drop(['target'], axis=1)
y = df['target']

In [102]:
method_name_to_attr_name = {
    'shap' : 'shap_value',
    'lime': 'lime_weight', 
    'ig':  'ig_attr', 
    'deepLift':'deepLift_attr' 
}

In [103]:
method2absFeatAttrNames = {}
for method_name, attr_name in method_name_to_attr_name.items(): 
    for feature_name in X.columns:
        feature_attr_name = feature_name + '_' + attr_name
        abs_feature_attr_name = 'abs_' + feature_attr_name
        method2absFeatAttrNames.setdefault(method_name,[]).append(abs_feature_attr_name)

In [104]:
# for fixing the x and y ranges of the visualization 
max_df = subset_analysis_df.max()
min_df = subset_analysis_df.min()

In [105]:
brush = alt.selection_interval()
base = alt.Chart(subset_analysis_df).encode(
    opacity = alt.condition(brush, alt.value(1.0), alt.value(0.5))
).add_selection(
    brush
).properties(
    width=350,
    height=350
)

In [106]:
def Get_feature_value_vs_mean_attr_plot(feature_name, method_name):
    # distribution of feature value plot 
    max_range_feature_value = max_df[feature_name]
    min_range_feature_value = min_df[feature_name]
    feature_value_bin_step = (max_range_feature_value - min_range_feature_value)/30

    view_feature_value = base.mark_bar().encode(
        x= alt.X(feature_name, bin=alt.Bin(step = feature_value_bin_step, extent=[min_range_feature_value, max_range_feature_value])), 
        y = alt.Y('count()'),
        color = 'target:N'
    )

    feature_attr_name = feature_name + '_' + method_name_to_attr_name[method_name]
    abs_feature_attr_name = 'abs_' + feature_attr_name

    # sliced mean attr plot 
    max_rang_feature_attr = max_df[feature_attr_name]
    min_range_feature_attr = min_df[feature_attr_name]
    feature_attr_bin_step = (max_rang_feature_attr - min_range_feature_attr)/30

    view_sliced_mean_attr_ = alt.Chart(subset_analysis_df).transform_fold(
        method2absFeatAttrNames[method_name],
        as_=['key', 'value']
    ).mark_bar().encode(
        x='key:N',
        y='mean(value):Q'
    ).transform_filter(
        brush
    )
    return view_feature_value | view_sliced_mean_attr_ 

In [107]:
Get_feature_value_vs_mean_attr_plot('BMI', 'shap')