In [2]:
from initialise import *

%matplotlib inline

with open('modelling_output.pickle', 'rb') as file:
    modelling_output = pickle.load(file)

In [3]:
# Classification report
for x in ["logreg", "rf", "xgb"]:

    y_true = modelling_output["predictions"].query(f"model == '{x}'")["actual"]
    y_pred = modelling_output["predictions"].query(f"model == '{x}'")["predicted_binary"]
    
    print(f"Classification report for {x}\n{'-' * 60}")
    print(
        metrics.classification_report(y_true, y_pred)
    )
    print("=" * 60, end = "\n" * 4)

Classification report for logreg
------------------------------------------------------------
              precision    recall  f1-score   support

           0       0.61      0.41      0.49       190
           1       0.80      0.90      0.85       506

    accuracy                           0.77       696
   macro avg       0.71      0.65      0.67       696
weighted avg       0.75      0.77      0.75       696




Classification report for rf
------------------------------------------------------------
              precision    recall  f1-score   support

           0       0.66      0.40      0.50       190
           1       0.80      0.92      0.86       506

    accuracy                           0.78       696
   macro avg       0.73      0.66      0.68       696
weighted avg       0.76      0.78      0.76       696




Classification report for xgb
------------------------------------------------------------
              precision    recall  f1-score   support

          

In [None]:
# This is definitely overkill...

# classification_metrics = {}

# y_true = modelling_output["predictions"].query(f"model == 'rf'")["actual"],
# y_pred = modelling_output["predictions"].query(f"model == 'rf'")["predicted_binary"]


##### [Explainer Dashboard](https://explainerdashboard.readthedocs.io/en/latest/)

In [4]:
explainer = explainerdashboard.ClassifierExplainer(modelling_output["rf_model"].fit(modelling_output["X_train"], modelling_output["y_train"]), 
                                                   modelling_output["X_test"], modelling_output["y_test"])

splitting pipeline...
Detected sklearn/imblearn Pipeline and succesfully extracted final output dataframe with column names and final model...
Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)


In [None]:
cf = explainer.plot_confusion_matrix()
imp = explainer.plot_importances()
roc_auc = explainer.plot_roc_auc()

In [11]:
explanations_output = {
    "confusion_matrix": cf,
    "feature_importance": imp,
    "roc_auc": roc_auc
}

with open("explanations_output.pickle", "wb") as file:
    pickle.dump(explanations_output, file, protocol = pickle.HIGHEST_PROTOCOL)

##### [Shapley Values](https://shap.readthedocs.io/en/latest/index.html)

In [None]:
explainer = shap.Explainer(modelling_output["xgb_model"].fit(modelling_output["X_train"], modelling_output["y_train"]).predict, # https://github.com/shap/shap/issues/2399
                           modelling_output["X_train"])

shap_values = explainer(modelling_output["X_train"])

In [None]:
shap.plots.bar(shap_values)

In [None]:
shap.plots.bar(shap_values.abs.max(0))

In [None]:
shap.plots.waterfall(shap_values[0])

In [None]:
shap.initjs() # https://github.com/shap/shap/issues/279
shap.plots.force(shap_values[0])

In [None]:
best_params_ = {'xgb__subsample': 0.6, 'xgb__scale_pos_weight': 2, 'xgb__reg_lambda': 100, 'xgb__reg_alpha': 0.1, 'xgb__n_estimators': 100, 'xgb__min_child_weight': 10, 'xgb__max_depth': 3, 'xgb__learning_rate': 0.1, 'xgb__gamma': 0.3, 'xgb__colsample_bytree': 0.8}
xgb_tuned_pipe = Pipeline([("scaler", StandardScaler()), ("xgb_tuned", XGBClassifier(**{x.replace("xgb__", ""): v for x, v in best_params_.items()}))])

In [None]:
shap.force_plot(shap_values[:500,])

In [None]:
shap.plots.beeswarm(shap_values)

In [None]:
shap.plots.beeswarm(shap_values.abs, color = "shap_red")

In [None]:
shap.plots.heatmap(shap_values[:1000])

In [None]:
shap.plots.scatter(shap_values[:, "age"])

In [None]:
shap.plots.scatter(shap_values[:, "age"], color = shap_values)

In [None]:
shap.plots.scatter(shap_values[:, "age"], color = shap_values[:, "mask wearing"])