In [1]:
import fs
import pandas as pd
import numpy as np
from joblib import load
from IPython.core.display import display, HTML
import shap

  from IPython.core.display import display, HTML


In [2]:
PROCESSED_PROJECT_DIR = fs.open_fs("../../data/processed/project")
VAL_DIR = PROCESSED_PROJECT_DIR.getsyspath("validation.csv")

MODELS_PROJECT_DIR = fs.open_fs("../../models/project")
PIPELINE_MODEL_DIR = MODELS_PROJECT_DIR.getsyspath("pipeline_model_tuning.joblib")

REPORTS_FIGURE_DIR = fs.open_fs("../../reports/figures/")
FIG_DIR = REPORTS_FIGURE_DIR.getsyspath("shap_force_plot.html")

In [3]:
val_df = pd.read_csv(VAL_DIR)
val_df.head()

Unnamed: 0,BAD,LOAN,MORTDUE,VALUE,REASON,JOB,YOJ,DEROG,DELINQ,CLAGE,NINQ,CLNO,DEBTINC
0,0,21300,101524.0,141859.0,HomeImp,ProfExe,10.0,0.0,0.0,162.322119,0.0,23.0,31.987636
1,0,5600,29648.0,39907.0,HomeImp,ProfExe,11.0,0.0,0.0,180.563069,0.0,13.0,21.55495
2,0,11400,84278.0,200480.0,HomeImp,Self,14.0,0.0,0.0,4.41277,1.0,8.0,19.112485
3,0,27900,69589.0,115061.0,DebtCon,Other,5.0,0.0,0.0,97.863845,1.0,32.0,41.473972
4,0,29800,134560.0,187551.0,DebtCon,ProfExe,1.0,0.0,0.0,225.092299,1.0,19.0,39.211937


In [4]:
X_val = val_df.drop(columns=['BAD'])
y_val = val_df['BAD']

In [5]:
model = load(PIPELINE_MODEL_DIR)

In [6]:
preprocessing_pipeline = model.named_steps['preprocessor']

In [7]:
X_val_processed = preprocessing_pipeline.transform(X_val)

In [8]:
xgb_model = model.named_steps['model']

In [9]:
explainer = shap.TreeExplainer(xgb_model)
shap_values = explainer.shap_values(X_val_processed)

In [10]:
j = 444
shap.initjs()
force_plot = shap.force_plot(explainer.expected_value, shap_values[j], X_val.iloc[[j]])

shap.save_html(FIG_DIR.format(j), force_plot)


In [11]:
with open(FIG_DIR, 'r') as f:
    html_content = f.read()

display(HTML(html_content))

In [12]:
prediction = model.predict(pd.DataFrame([X_val.iloc[j]]))

shap_log_odds = explainer.expected_value + shap_values[j].sum()

probability = 1 / (1 + np.exp(-shap_log_odds))

print(f"Predicted Value: {prediction[0]}")
print(f"Predicted Probability for Loan Default: {probability: .3f}")

Predicted Value: 0
Predicted Probability for Loan Default:  0.249


In [13]:
pd.DataFrame([
        X_val.iloc[j].values,
        shap_values[j]
    ], 
    index=['values', 'shap_values'],
    columns=X_val.columns)

Unnamed: 0,LOAN,MORTDUE,VALUE,REASON,JOB,YOJ,DEROG,DELINQ,CLAGE,NINQ,CLNO,DEBTINC
values,10800.0,11950.0,46780.0,HomeImp,Office,24.0,0.0,0.0,268.9,0.0,20.0,36.158718
shap_values,0.114452,0.11725,0.241528,-0.042627,-0.044111,-0.344267,-0.516866,-0.038045,0.020859,0.999818,-0.103286,0.033779


In [14]:
shap.initjs()
shap.plots.force(explainer.expected_value, shap_values[:-1], features = X_val.columns)