In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, classification_report, accuracy_score, f1_score, precision_score, recall_score
from xgboost import XGBClassifier
import numpy as np
import pandas as pd

from explainers import EBM, MimicExpl, LIME
from explainer_utilities import create_metrics_dataframe

from lime import lime_tabular
import sys

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [2]:
# Load the Breast Cancer dataset
data = load_breast_cancer()

# Create a DataFrame with feature names
X = pd.DataFrame(data.data, columns=data.feature_names)

# Create a Series for the target variable
y = pd.Series(data.target, name='target')


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
# Initialize and train the XGBoost Classifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

In [16]:
model_pred_proba = model.predict_proba(X_test)[:,1]

predict_function = model.predict
model_pred_class = predict_function(X_test).values \
    if type(predict_function(X_test)) in [pd.DataFrame, pd.Series] \
    else predict_function(X_test)

model_pred_class

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1])

In [17]:
print(classification_report(y_test, model_pred_class))

              precision    recall  f1-score   support

           0       0.95      0.97      0.96        63
           1       0.98      0.97      0.98       108

    accuracy                           0.97       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.97      0.97      0.97       171



## LIME

In [7]:
#!pip install lime

In [18]:
lime = LIME.LIME(model, X_train, y_train, mode='classification')
lime_pred_estimation = lime.predict_proba(X_test)
y_pred_class_lime = lime.predict(X_test)

## Explainable Boosting Machine

In [12]:
ebm = EBM.EBM(model, X_train, y_train, mode='classification')
ebm_pred_estimation = ebm.predict_proba(X_test)[:, 1]
y_pred_class_ebm = ebm.predict(X_test)

## Mimic Explainer

In [13]:
mimic = MimicExpl.MimicExpl(model, X_train, y_train, mode='classification')
y_pred_class_mimic = mimic.predict(X_test)
mimic_pred_estimation = mimic.predict_proba(X_test)

## Results

In [19]:
estimator_names = ['lime', 'ebm', 'mimic']
pred_estimations = [lime_pred_estimation, ebm_pred_estimation, mimic_pred_estimation]
y_pred_classes = [y_pred_class_lime, y_pred_class_ebm, y_pred_class_mimic]

results = create_metrics_dataframe(estimator_names, pred_estimations, y_pred_classes, model_pred_proba, model_pred_class)
results

Unnamed: 0,lime,ebm,mimic
MSE,0.012911,0.009377,0.006083
ACCURACY,0.964912,0.97076,0.976608
F1_SCORE,0.972477,0.976744,0.981308
PRECISION,0.990654,0.981308,0.981308
RECALL,0.954955,0.972222,0.981308
