In [1]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, classification_report, accuracy_score, f1_score, precision_score, recall_score
from xgboost import XGBClassifier
import numpy as np
import pandas as pd

from explainers import EBM, MimicExpl, LIME
from explainer_utilities import create_metrics_dataframe
from ExplainerFactory import ExplainerFactory


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [2]:
# Load the Breast Cancer dataset
data = load_wine()

# Create a DataFrame with feature names
X = pd.DataFrame(data.data, columns=data.feature_names)

# Create a Series for the target variable
y = pd.Series(data.target, name='target')


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
# Initialize and train the XGBoost Classifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

In [4]:
model_pred_proba = model.predict_proba(X_test)[:,1]

predict_function = model.predict
model_pred_class = predict_function(X_test).values \
    if type(predict_function(X_test)) in [pd.DataFrame, pd.Series] \
    else predict_function(X_test)

model_pred_class

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 2, 2, 0, 2, 0, 1, 1, 1,
       2, 0, 1, 1, 2, 0, 1, 0, 0, 2], dtype=int64)

In [5]:
print(classification_report(y_test, model_pred_class))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97        19
           1       0.95      0.95      0.95        21
           2       1.00      0.93      0.96        14

    accuracy                           0.96        54
   macro avg       0.97      0.96      0.96        54
weighted avg       0.96      0.96      0.96        54



In [6]:
expl_fctry = ExplainerFactory(model, X_train, X_test, y_train, y_test)

## LIME

In [7]:
#!pip install lime

In [6]:
lime = LIME.LIME(model, X_train, y_train, mode='classification')
lime_pred_proba = lime.predict_proba(X_test)
lime_pred_class = lime.predict(X_test)

[2, 1, 0]


[0.87512857]
[0.9956     0.00186535 0.00253461]
[0.81585521]
[0.99557555 0.00187574 0.00254872]
[0.02661226]
[0.01521357 0.01966006 0.96512634]
[0.81243637]
[0.99402976 0.00287319 0.00309711]
[0.13258863]
[0.008872   0.98093975 0.01018826]
[0.8298006]
[0.9946203  0.00228068 0.00309895]
[-0.12624534]
[8.8241749e-04 9.9810421e-01 1.0133333e-03]
[0.28496536]
[0.00672182 0.00168492 0.99159324]
[0.29088542]
[0.00321803 0.9954353  0.00134668]
[0.30098329]
[0.0733278  0.01228685 0.9143854 ]
[0.50998323]
[0.563806   0.43231773 0.00387629]
[0.22058511]
[0.04981271 0.7612247  0.18896261]
[0.65008642]
[0.99453604 0.00274246 0.00272154]
[-0.07941815]
[0.00186526 0.9694276  0.0287072 ]
[0.86462968]
[0.99552417 0.002154   0.00232187]
[-0.06714876]
[0.00122445 0.99776256 0.00101299]
[-0.27982372]
[0.00120577 0.99774927 0.00104492]
[-0.19797307]
[9.1019092e-04 9.9804461e-01 1.0452272e-03]
[0.91270012]
[0.9954281  0.00225028 0.00232164]
[-0.09414906]
[0.00126298 0.9976922  0.00104486]
[0.87428774]
[0.9

In [None]:
lime_pred_proba

array([0.21067498, 1.        , 1.        , 0.        , 0.        ,
       1.        , 1.        , 0.77561677, 0.40924218, 0.        ,
       0.03854717, 1.        , 0.02902513, 0.83607084, 0.        ,
       1.        , 0.        , 0.        , 0.        , 1.        ,
       0.13588466, 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.51938812, 0.        , 1.        , 0.05395775,
       0.        , 0.7612285 , 0.        , 0.        , 0.06668949,
       0.06796503, 0.        , 0.01346122, 0.24697287, 0.        ,
       1.        , 1.        , 0.37317492, 0.0536356 , 0.        ,
       0.        , 0.        , 1.        , 0.83924354, 0.        ,
       0.        , 1.        , 1.        , 0.10935959, 0.        ,
       0.20720475, 1.        , 1.        , 0.        , 0.        ,
       1.        , 1.        , 0.        , 0.93948208, 0.     

## Explainable Boosting Machine

In [None]:
ebm = EBM.EBM(model, X_train, y_train, mode='classification')
ebm_pred_proba = ebm.predict_proba(X_test)
ebm_pred_class = ebm.predict(X_test)

In [None]:
ebm_pred_proba

array([[1.60266668e-02, 9.83973333e-01],
       [9.99616850e-01, 3.83149669e-04],
       [9.95264357e-01, 4.73564289e-03],
       [5.66185440e-03, 9.94338146e-01],
       [3.63842908e-04, 9.99636157e-01],
       [9.99954382e-01, 4.56180378e-05],
       [9.99635464e-01, 3.64536187e-04],
       [9.73877603e-01, 2.61223970e-02],
       [8.31393855e-01, 1.68606145e-01],
       [1.36419627e-03, 9.98635804e-01],
       [2.33251030e-02, 9.76674897e-01],
       [9.97224691e-01, 2.77530899e-03],
       [7.34262701e-03, 9.92657373e-01],
       [9.79071454e-01, 2.09285465e-02],
       [1.67147207e-04, 9.99832853e-01],
       [9.99059224e-01, 9.40776413e-04],
       [5.33933473e-03, 9.94660665e-01],
       [4.12458150e-04, 9.99587542e-01],
       [3.43290969e-04, 9.99656709e-01],
       [9.99373134e-01, 6.26865554e-04],
       [4.05998937e-02, 9.59400106e-01],
       [2.22904667e-03, 9.97770953e-01],
       [9.99970765e-01, 2.92350166e-05],
       [6.80670999e-04, 9.99319329e-01],
       [1.968642

## Mimic Explainer

In [None]:
mimic = MimicExpl.MimicExpl(model, X_train, y_train, mode='classification')
mimic_pred_proba = mimic.predict_proba(X_test)
mimic_pred_class = mimic.predict(X_test)

## Results

In [None]:
estimator_names = ['lime', 'ebm', 'mimic']
pred_probabilities = [lime_pred_proba, ebm_pred_proba, mimic_pred_proba]
pred_classes = [lime_pred_class, ebm_pred_class, mimic_pred_class]

results = create_metrics_dataframe(estimator_names, pred_probabilities, pred_classes, model_pred_proba, model_pred_class)
results

ValueError: y_true and y_pred have different number of output (2!=1)

In [None]:
import numpy as np
import lime
import lime.lime_tabular
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Load data and train a model
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Instantiate LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=data.feature_names, class_names=data.target_names, discretize_continuous=True)

# Choose an instance to explain
instance = X_test[0].reshape(1, -1)
model_probabilities = model.predict_proba(instance)

# Generate explanations
explanation = explainer.explain_instance(instance[0], model.predict_proba, num_features=len(data.feature_names))

# Extract local model probabilities (for binary classification, need adjustments for multiclass)
local_model = explanation.local_exp[1]
local_features = [feat[0] for feat in local_model]
local_weights = [feat[1] for feat in local_model]

# Estimate local probabilities
local_prob_estimate = sum(weight * instance[0][feature] for feature, weight in zip(local_features, local_weights))
local_prob_estimate = 1 / (1 + np.exp(-local_prob_estimate))  # Logistic function for binary classification

# Compare with model probabilities
print("Model probabilities:", model_probabilities)
print("LIME estimated local probabilities:", local_prob_estimate)


In [None]:
import numpy as np
import lime
import lime.lime_tabular
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Load data and train a model
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Instantiate LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=data.feature_names, class_names=data.target_names, discretize_continuous=True)

# Choose an instance to explain
instance = X_test[10].reshape(1, -1)
model_probabilities = model.predict_proba(instance)

# Generate explanations
explanation = explainer.explain_instance(instance[0], model.predict_proba, num_features=len(data.feature_names))

# Extract local model probabilities for each class
local_prob_estimates = []
for class_idx in range(len(data.target_names)):
    print(explanation.local_exp)
    local_model = explanation.local_exp[class_idx]
    local_features = [feat[0] for feat in local_model]
    local_weights = [feat[1] for feat in local_model]

    # Calculate the weighted sum for this class
    weighted_sum = sum(weight * instance[0][feature] for feature, weight in zip(local_features, local_weights))
    local_prob_estimate = 1 / (1 + np.exp(-weighted_sum))  # Logistic function for binary classification
    local_prob_estimates.append(local_prob_estimate)

# Normalize probabilities to sum to 1 (Softmax function)
local_prob_estimates = np.exp(local_prob_estimates)
local_prob_estimates = local_prob_estimates / np.sum(local_prob_estimates)

# Compare with model probabilities
print("Model probabilities:", model_probabilities)
print("LIME estimated local probabilities:", local_prob_estimates)
