In [67]:
import pandas as pd
import altair as alt
from altair.expr import datum
import warnings
from altair_transform import extract_data
from altair_transform import transform_chart
warnings.filterwarnings('ignore')

In [68]:
# load Model Prediction
predictProb_filename = '../Saved/Attr/lime_infos.pkl'
predict_prob_df = pd.DataFrame(data = pd.read_pickle(predictProb_filename)['model_pred'], columns=['model_pred'])

In [69]:
### Load the dataframe to be visualized 
Indiv_melted_filename = '../Saved/4vis/Indiv_melted.pkl'
Indiv_melted = pd.read_pickle(Indiv_melted_filename)

In [70]:
dict_method_name_to_attr_name = {
    'shap' : 'shap_value',
    'lime': 'lime_weight', 
    'ig':  'ig_attr', 
    'deepLift':'deepLift_attr' 
}

## Make the Charts

In [71]:
def feature_attr_and_rank_plot(index):
    values_melted = Indiv_melted.loc[index]
    sorted_feature = values_melted[values_melted['method'] == 'sum_rank'].sort_values(by = 'feature_attr', ascending = False)['feature_name'].to_list() 
    values_melted['is_positive'] = values_melted['feature_attr'] > 0

    ## bar chart for feature attribution values 
    brush = alt.selection_multi(on='mouseover', toggle=True, fields=['feature_name'])
    base = alt.Chart(values_melted).mark_bar().encode(
        opacity = alt.condition(brush, alt.value(1.0), alt.value(0.2))
    ).add_selection(
        brush
    )

    row1 = alt.hconcat()
    row2 = alt.hconcat()
    i = 0
    for _, attr_name in dict_method_name_to_attr_name.items():
        chart = base.encode(
            y=alt.Y('feature_name', sort='-x', title = ''),
            x= alt.X('feature_attr', title = attr_name),
            color = alt.Color('is_positive', legend=None, scale=alt.Scale(range=['crimson', 'steelblue']))
        ).transform_filter(
            datum.method == attr_name
        ).properties(
            width=200,
            height=200
        )
        if i < 2:
            row1 |= chart
        else:
            row2 |= chart
        i += 1
    attr_value_bar = alt.vconcat(row1, row2)
 
    model_predict = round(predict_prob_df.iloc[index].values[0], 4)
    attr_value_bar = alt.vconcat(row1, row2)

    # bar chart for signed feature rank: group lollipop chart visualization 
    line = base.mark_bar(size = 5).encode(
        x = alt.X('method', title=''),
        y = alt.X('feature_attr', title = 'feature_rank'),
        color = alt.Color('is_positive', legend=None, scale=alt.Scale(range=['crimson', 'steelblue'])) 
    ).transform_filter(
        alt.FieldOneOfPredicate(field='method', oneOf= ['shap_rank', 'lime_rank', 'ig_rank', 'deepLift_rank'])
    ).properties(
        width=50,
        height=200
    )
    attr_signed_rank = (line).facet(column=alt.Column('feature_name', sort = sorted_feature)) 

    return alt.hconcat(attr_value_bar, attr_signed_rank, title = 'model predict: ' + str(model_predict))

## Show the visualization 

In [72]:
feature_attr_and_rank_plot(0)

In [73]:
Indiv_melted

Unnamed: 0,feature_name,method,feature_attr
0,Pregnant,feature_value,4.0
1,Pregnant,feature_value,4.0
2,Pregnant,feature_value,2.0
3,Pregnant,feature_value,13.0
4,Pregnant,feature_value,5.0
...,...,...,...
763,Age,sum_rank,18.0
764,Age,sum_rank,23.0
765,Age,sum_rank,29.0
766,Age,sum_rank,23.0


In [74]:
feature_value_group = Indiv_melted.groupby(by = ["method"]).get_group('feature_value') 
shap_group = Indiv_melted.groupby(by = ["method"]).get_group('shap_value') 

In [75]:
feature_value_group

Unnamed: 0,feature_name,method,feature_attr
0,Pregnant,feature_value,4.0
1,Pregnant,feature_value,4.0
2,Pregnant,feature_value,2.0
3,Pregnant,feature_value,13.0
4,Pregnant,feature_value,5.0
...,...,...,...
763,Age,feature_value,32.0
764,Age,feature_value,21.0
765,Age,feature_value,24.0
766,Age,feature_value,67.0


In [76]:
shap_group['feature_value'] = feature_value_group['feature_attr']

In [77]:
shap_group = shap_group.loc[range(600)]
shap_group

Unnamed: 0,feature_name,method,feature_attr,feature_value
0,Pregnant,shap_value,-0.005457,4.000
0,glucose,shap_value,-0.163674,117.000
0,blood pressure,shap_value,-0.729109,62.000
0,skin thickness,shap_value,-0.668367,12.000
0,Insulin Level,shap_value,8.454922,0.000
...,...,...,...,...
599,skin thickness,shap_value,0.178805,24.000
599,Insulin Level,shap_value,-4.415049,44.000
599,BMI,shap_value,-0.080675,29.900
599,Pedigree,shap_value,-0.146945,0.422


In [78]:
# manually calculate the mean_attr for each feature_name for every interval of the chosen feature_value
# def Get_Filter_df(shap_group, feature_name, interval):
#     shap_group.loc[]


In [79]:
shap_chart = alt.Chart(shap_group).transform_aggregate(
    mean_attr = 'mean(feature_attr)',
    groupby= ['feature_name']
).mark_bar().encode(
    x = alt.X('mean_attr:Q'),
    y = alt.Y('feature_name'),
).transform_filter(
    (datum.feature_name == 'Age') & (datum.feature_value > 24)  
)
# shap_chart = transform_chart(shap_chart)

In [80]:
shap_chart

In [81]:
shap_group.loc[(shap_group.feature_name == 'Age') & (60 < shap_group.feature_value), :]

Unnamed: 0,feature_name,method,feature_attr,feature_value
48,Age,shap_value,-7.47265,63.0
72,Age,shap_value,-15.349252,81.0
76,Age,shap_value,-4.881092,61.0
82,Age,shap_value,-7.619405,67.0
113,Age,shap_value,-6.612038,67.0
115,Age,shap_value,-9.191104,72.0
153,Age,shap_value,-7.018104,65.0
191,Age,shap_value,-6.125792,62.0
277,Age,shap_value,-6.409915,62.0
317,Age,shap_value,-7.485101,70.0


In [82]:
shap_group

Unnamed: 0,feature_name,method,feature_attr,feature_value
0,Pregnant,shap_value,-0.005457,4.000
0,glucose,shap_value,-0.163674,117.000
0,blood pressure,shap_value,-0.729109,62.000
0,skin thickness,shap_value,-0.668367,12.000
0,Insulin Level,shap_value,8.454922,0.000
...,...,...,...,...
599,skin thickness,shap_value,0.178805,24.000
599,Insulin Level,shap_value,-4.415049,44.000
599,BMI,shap_value,-0.080675,29.900
599,Pedigree,shap_value,-0.146945,0.422


In [83]:
shap_chart = alt.Chart(shap_group).mark_bar().encode(
    x = alt.X('mean_attr:Q'),
    y = alt.Y('feature_name'),
)

In [84]:
data_filename = '../Saved/Model/data.pkl'
df =  pd.read_pickle(data_filename)
shap_df = pd.read_pickle('../Saved/Attr/shap_rank.pkl')

In [86]:
shap_df = pd.concat([df, shap_df], axis = 1)

In [87]:
shap_df

Unnamed: 0,Pregnant,glucose,blood pressure,skin thickness,Insulin Level,BMI,Pedigree,Age,target,Pregnant.1,glucose.1,blood pressure.1,skin thickness.1,Insulin Level.1,BMI.1,Pedigree.1,Age.1
0,4.0,117.0,62.0,12.0,0.0,29.7,0.380,30.0,1,-1.0,-3.0,-7.0,-4.0,8.0,-5.0,-2.0,-6.0
1,4.0,158.0,78.0,27.0,0.0,32.9,0.803,31.0,1,-3.0,-5.0,7.0,2.0,8.0,1.0,-6.0,-4.0
2,2.0,118.0,80.0,27.0,0.0,42.9,0.693,21.0,1,3.0,-2.0,-5.0,-1.0,8.0,4.0,6.0,-7.0
3,13.0,129.0,74.0,30.0,0.0,39.9,0.569,44.0,1,-6.0,1.0,3.0,-2.0,8.0,5.0,4.0,-7.0
4,5.0,162.0,104.0,27.0,0.0,37.7,0.151,52.0,1,-2.0,7.0,-3.0,1.0,8.0,4.0,-5.0,-6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,1.0,81.0,74.0,41.0,57.0,46.3,1.096,32.0,0,-4.0,-6.0,-1.0,2.0,-8.0,5.0,7.0,3.0
764,4.0,94.0,65.0,22.0,39.0,24.7,0.148,21.0,0,-2.0,-7.0,3.0,-1.0,-8.0,-5.0,-4.0,-6.0
765,3.0,158.0,64.0,13.0,387.0,31.2,0.295,24.0,0,1.0,7.0,2.0,-6.0,5.0,-3.0,-4.0,-8.0
766,0.0,57.0,60.0,21.0,39.0,21.7,0.735,67.0,0,-2.0,-7.0,1.0,3.0,-6.0,-5.0,4.0,-8.0
