In [None]:
import dalex as dx
import pandas as pd
import pickle
import xgboost as xgb

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from IPython.display import Image # do zapisywania wykresów

In [None]:
!pip install -U kaleido

Potrzebujemy do zapisania wykresów do pliku

In [None]:
rf_color = '#8bdcbe'
xgb_color = '#4378bf'
log_reg_color = '#ae2c87'

In [None]:
input_df = pd.read_csv('https://github.com/mini-pw/2021L-WB-XAI-1/raw/main/PraceDomowe/PracaDomowa3/Sawicki_Bartosz/new_preprocessed_dataset.csv')
y = input_df.loc[:,'Attrition']
X = input_df.drop('Attrition', axis='columns')
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=14)

In [None]:
path = '../modele/'
xgb = pickle.load(open(path + 'new_xgb_model.p', "rb" ))
explainer_xgb = dx.Explainer(xgb, X_train, y_train, label='XGB')
rf = pickle.load(open(path + 'new_random_forest_model.p', "rb" ))
explainer_rf = dx.Explainer(rf, X_train, y_train, label='RF')
reg = pickle.load(open(path + 'l1_log_reg.p', "rb" ))
explainer_reg = dx.Explainer(reg, X_train, y_train, label='LogisticRegression')

In [None]:
def save_plot(path, plot):
    with open(path, "wb") as binary_file:
        binary_file.write(plot.to_image(format="png", engine="kaleido"))

In [None]:
xgb_parts = explainer_xgb.model_parts(random_state=123)
rf_parts = explainer_rf.model_parts(random_state=123)
plot = rf_parts.plot(xgb_parts, split='variable', vertical_spacing=0.001, max_vars=9, show=False)
save_plot('gen_images/1-1-permutational-feature-xgb-rf.png', plot)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
C = [.1, .2, .25, .3,.35, .4, .45, .5, .55, .6, .7, 1, 1.25, 1.5, 2, 5]
lr_models = {}
for c in C:
    lr = LogisticRegression(C=c, penalty='l1', solver='liblinear', max_iter=1000, random_state=123).fit(X_train, y_train)
    print('{:3} accuracy: {:2.2%}'.format(c, accuracy_score(y_test, lr.predict(X_test))))
    lr_models[c] = lr
    
    
explainers = []
for row in lr_models.items():
    explainer = dx.Explainer(row[1], X_train, y_train, label='log_reg(c={:3})'.format(row[0]), verbose=0)
    explainers.append(explainer)

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Total_Revolving_Bal"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-log-reg-group-total-revolving-bal.png', plot)

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Total_Ct_Chng_Q4_Q1"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-log-reg-group-total-ct-chng.png', plot)

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Gender"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-log-reg-group-gender.png', plot)

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Avg_Utilization_Ratio"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-log-reg-group-utilization.png', plot)

## Tu dam wszystkie 4 zmienne opisane w artykule na jednym wykresie

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, 
                variables=["Avg_Utilization_Ratio", "Gender", "Total_Ct_Chng_Q4_Q1", "Total_Revolving_Bal"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.01, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
plot.data[0]['marker']['color'] = tuple(new_colors)
plot.data[1]['marker']['color'] = tuple(new_colors)
plot.data[2]['marker']['color'] = tuple(new_colors)
plot.data[3]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-log-reg-group-4-vars.png', plot)

## Wykresy regresji oraz XGB i RF

In [None]:
explainers.append(explainer_xgb)
explainers.append(explainer_rf)

## Dla 4 zmiennych

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, 
                variables=["Avg_Utilization_Ratio", "Gender", "Total_Trans_Amt", "Total_Revolving_Bal"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.01, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
new_colors.append(xgb_color)
new_colors.append(rf_color)

plot.data[0]['marker']['color'] = tuple(new_colors)
plot.data[1]['marker']['color'] = tuple(new_colors)
plot.data[2]['marker']['color'] = tuple(new_colors)
plot.data[3]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-all-models-4-vars.png', plot)

## Dla pojedynczych zmiennych

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Avg_Utilization_Ratio"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
new_colors.append(xgb_color)
new_colors.append(rf_color)
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-all-models-utilization-ratio.png', plot)

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Total_Trans_Amt"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
new_colors.append(xgb_color)
new_colors.append(rf_color)
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-all-models-total-trans-amt.png', plot)

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Total_Revolving_Bal"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
new_colors.append(xgb_color)
new_colors.append(rf_color)
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-all-models-total-revolving-bal.png', plot)

In [None]:
model_parts = []
for explainer in explainers:
    model_parts.append(explainer.model_parts(random_state=123, variables=["Gender"]))

plot = model_parts[0].plot(model_parts[1:], split='variable', vertical_spacing=0.001, max_vars=21, show=False)
new_colors = [log_reg_color]*16 # tworzymy liste kolorów o takiej długości, ile jest słupków
new_colors.append(xgb_color)
new_colors.append(rf_color)
plot.data[0]['marker']['color'] = tuple(new_colors)
save_plot('gen_images/1-1-pfi-all-models-gender.png', plot)

# PDP i ALE

In [None]:
pdp_xgb = explainer_xgb.model_profile(random_state=14)
pdp_rf = explainer_rf.model_profile(random_state=14)
pdp_reg = explainer_reg.model_profile(random_state=14)

In [None]:
plot = pdp_xgb.plot([pdp_rf, pdp_reg], 
            variables = ['Total_Trans_Amt', 'Total_Revolving_Bal', 
                         'Contacts_Count_12_mon', 'Total_Ct_Chng_Q4_Q1', 'Gender'], 
                    show=False)
for i in range(5):
    plot.data[i].line.color = xgb_color
for i in range(5,10):
    plot.data[i].line.color = rf_color
for i in range(10,15):
    plot.data[i].line.color = log_reg_color
save_plot('gen_images/1-1-pdp-chosen-vars.png', plot)

In [None]:
ale_xgb = explainer_xgb.model_profile(type = 'accumulated', random_state=14)
ale_rf = explainer_rf.model_profile(type = 'accumulated', random_state=14)
ale_reg = explainer_reg.model_profile(type = 'accumulated', random_state=14)

In [None]:
plot = ale_xgb.plot([ale_rf, ale_reg],
             variables=['Total_Trans_Amt', 'Total_Revolving_Bal', 
                        'Contacts_Count_12_mon', 'Total_Ct_Chng_Q4_Q1', 'Gender'], show=False)
for i in range(5):
    plot.data[i].line.color = xgb_color
for i in range(5,10):
    plot.data[i].line.color = rf_color
for i in range(10,15):
    plot.data[i].line.color = log_reg_color
save_plot('gen_images/1-1-ale-chosen-vars.png', plot)

In [None]:
ale_xgb.result['_label_'] = "ALE_XGB"
pdp_xgb.result['_label_'] = "PDP_XGB"
ale_xgb_color = '#7aa8e6'
pdp_xgb_color = '#032554'
plot = ale_xgb.plot(pdp_xgb, variables = ['Total_Trans_Amt', 'Total_Revolving_Bal', 
                         'Contacts_Count_12_mon', 'Total_Ct_Chng_Q4_Q1', 'Gender'], show=False)
                        
for i in range(5):
    plot.data[i].line.color = ale_xgb_color
for i in range(5,10):
    plot.data[i].line.color = pdp_xgb_color
save_plot('gen_images/1-1-ale-pdp-xgb.png', plot)

In [None]:
ale_rf.result['_label_'] = "ALE_RF"
pdp_rf.result['_label_'] = "PDP_RF"
ale_rf_color = '#abd4c5'
pdp_rf_color = '#2cbf89'
plot = ale_rf.plot(pdp_rf, variables = ['Total_Trans_Amt', 'Total_Revolving_Bal', 
                         'Contacts_Count_12_mon', 'Total_Ct_Chng_Q4_Q1', 'Gender'], show=False)
                        
for i in range(5):
    plot.data[i].line.color = ale_rf_color
for i in range(5,10):
    plot.data[i].line.color = pdp_rf_color
save_plot('gen_images/1-1-ale-pdp-rf.png', plot)