In [1]:
import numpy as np
import pandas as pd
from expybox import ExpyBox

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [2]:
train_df = pd.read_csv('data/titanic.csv')

In [3]:
train_df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
0,0,3,0,22,1,0,7,1,0
1,1,1,1,38,1,0,71,0,1
2,1,3,1,26,0,0,7,1,2
3,1,1,1,35,1,0,53,1,1
4,0,3,0,35,0,0,8,1,0
...,...,...,...,...,...,...,...,...,...
886,0,2,0,27,0,0,13,1,5
887,1,1,1,19,0,0,30,1,2
888,0,3,1,21,1,2,23,1,2
889,1,1,0,26,0,0,30,0,0


Prepare class_names and categorical_names

In [4]:
class_names = ['Died', 'Survived']
categorical_names = {
    1: ['male', 'female'],
    6: ['Cherbourg', 'Southampton', 'Queenstown'],
    7: ['Mr', 'Mrs', 'Miss', 'Master', 'Don', 'Rev', 'Dr', 'Mme', 'Ms', 'Major', 
        'Lady', 'Sir', 'Mlle', 'Col', 'Capt', 'Countess', 'Jonkheer']
}


### Train model
For example random forest classifier

In [5]:
X_train = train_df.drop("Survived", axis=1)
Y_train = train_df["Survived"]

In [6]:
clf = RandomForestClassifier(random_state=42, max_depth=4, n_estimators=100)
clf.fit(X_train, Y_train)

clf.score(X_train, Y_train)

0.8428731762065096

In [7]:
scores = cross_val_score(clf, X_train, Y_train, cv=5)
display("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

'Accuracy: 0.82 (+/- 0.03)'

## Initialize ExpyBox instance

In [8]:
expybox = ExpyBox(train_data=X_train, predict_function=clf.predict_proba, kernel_globals=globals(),
                  categorical_names=categorical_names, mode='classification', class_names=class_names)

## Partial Dependence Plots

In [9]:
expybox.pdplot()

Tab(children=(GridspecLayout(children=(Combobox(value='Pclass', description='Feature to plot:', ensure_option=…

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

## LIME

In [10]:
expybox.lime()

Tab(children=(VBox(children=(Accordion(children=(BoundedIntText(value=0, description='Instance id:', descripti…

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

## Anchors

In [11]:
expybox.anchors()

Tab(children=(VBox(children=(Accordion(children=(BoundedIntText(value=0, description='Instance id:', descripti…

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

## SHAP
For random forest we can use treeSHAP

In [12]:
expybox.shap()

Tab(children=(VBox(children=(Accordion(children=(BoundedIntText(value=0, description='Instance id:', descripti…

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

### SHAP feature importance

In [13]:
expybox.shap_feature_importance()

Tab(children=(GridspecLayout(children=(Dropdown(description='Class to plot:', description_tooltip='For classif…

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…