In [1]:
import warnings
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from interpret.blackbox import LimeTabular
from interpret import show


# %run (in .ipynb) is similar to import function from .py files.
%run utils.ipynb

In [2]:
# %% Load and preprocess data
data_loader = DataLoader()
data_loader.load_dataset()
data_loader.preprocess_data()
# Split the data for evaluation
X_train, X_test, y_train, y_test = data_loader.get_data_split()
# Oversample the train data
X_train, y_train = data_loader.oversample(X_train, y_train)
print(X_train.shape)
print(X_test.shape)

(7778, 21)
(1022, 21)


In [7]:

print("Number of unique labels in y_train:", pd.Series(y_train).value_counts())
print("Unique unique labels in y_test:", pd.Series(y_test).value_counts())


Number of unique labels in y_train: stroke
0    3889
1    3889
Name: count, dtype: int64
Unique labels in y_test: stroke
0    972
1     50
Name: count, dtype: int64


In [8]:
# random forest as blackbox model
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")
print(f"Prescision {precision_score(y_test, y_pred, average='weighted')}")
print(f"Recall {recall_score(y_test, y_pred, average='weighted')}")


F1 Score 0.5200444008026299
Accuracy 0.9461839530332681
Prescision 0.9168824417171251
Recall 0.9461839530332681


In [None]:
LIME Explainer 

In [11]:

# Initilize Lime for Tabular data
lime = LimeTabular(model=rf.predict_proba, 
                   data=X_train, 
                   random_state=1)
# Get local explanations
lime_local = lime.explain_local(X_test[-20:], 
                                y_test[-20:], 
                                name='LIME')

show(lime_local)

# %%