In [1]:
import pandas as pd 
pd.set_option('display.max_columns', None)
from sklearn.model_selection import train_test_split
from utils import DataLoader

from interpret.glassbox import LogisticRegression, ClassificationTree, ExplainableBoostingClassifier
from interpret import show
from sklearn.metrics import f1_score, accuracy_score

## Load Dataset

In [2]:
SEED = 2021
STROKE_PATH = "data/stroke.csv"
data_loader = DataLoader(STROKE_PATH)
data_loader.preprocess_data()
X_train, X_test, y_train, y_test = data_loader.get_data_split()

## LoR: Logistic Regression

In [4]:

# Fitting LR Model
lr = LogisticRegression(random_state=SEED, feature_names=X_train.columns, penalty='l1', solver='liblinear')
lr.fit(X_train, y_train)
print("Training finished.")

# Evaluate model
y_pred = lr.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# Explain local prediction
lr_local = lr.explain_local(X_test[:100], y_test[:100], name='Logistic Regression')
show(lr_local)

# Explain global logistic regression model
lr_global = lr.explain_global(name='Logistic Regression')
show(lr_global)

Training finished.
F1 Score 0.5048585401526579
Accuracy 0.9481409001956947


## Classification Tree

In [5]:
# %% Fit decision tree model
tree = ClassificationTree()
tree.fit(X_train, y_train)
print("Training finished.")
y_pred = tree.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Explain local prediction
tree_local = tree.explain_local(X_test[:100], y_test[:100], name='Tree')
show(tree_local)

# %% Explain globally
ebm_global = tree.explain_global(name='Classification Tree')
show(ebm_global)

Training finished.
F1 Score 0.5073148175476915
Accuracy 0.952054794520548


## EBM: Explainable Boosting Machine

In [6]:
# %% Fit Explainable Boosting Machine
ebm = ExplainableBoostingClassifier(random_state=SEED)
ebm.fit(X_train, y_train) 
print("Training finished.")
y_pred = ebm.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Explain locally
ebm_local = ebm.explain_local(X_test[:100], y_test[:100], name='EBM')
show(ebm_local)

# %% Explain globally
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

Training finished.
F1 Score 0.5386324964406015
Accuracy 0.949119373776908
