In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from sklearn.metrics import roc_auc_score, r2_score, accuracy_score, f1_score, precision_score, recall_score, log_loss

In [4]:
from explainerdashboard.explainers import *
from explainerdashboard.dashboards import *
from explainerdashboard.datasets import *

In [5]:
feature_descriptions = {
    "Sex": "Gender of passenger",
    "Deck": "The deck the passenger had their cabin on",
    "PassengerClass": "The class of the ticket: 1st, 2nd or 3rd class",
    "Fare": "The amount of money people paid", 
    "No_of_relatives_on_board": "number of siblings, spouses, parents plus children on board",
    "Embarked": "the port where the passenger boarded the Titanic. Either Southampton, Cherbourg or Queenstown",
    "Age": "Age of the passenger",
    "No_of_siblings_plus_spouses_on_board": "The sum of the number of siblings plus the number of spouses on board",
    "No_of_parents_plus_children_on_board" : "The sum of the number of parents plus the number of children on board",
}

# ClassifierBunch example

## Load classifier data:
    - predicting probability that a person on the titanic survived

In [28]:
X_train, y_train, X_test, y_test = titanic_survive()
train_names, test_names = titanic_names()

## RandomForestClassifier

In [29]:
model = RandomForestClassifier(n_estimators=50, max_depth=5)
model.fit(X_train, y_train)

explainer = RandomForestClassifierExplainer(model, X_test, y_test, metric=roc_auc_score, 
                               shap='tree',
                               cats=['Sex', 'Deck', 'Embarked'],
                               idxs=test_names, #names of passengers 
                               descriptions=feature_descriptions,
                               labels=['Not survived', 'Survived'])

Generating self.shap_explainer = shap.TreeExplainer(model)
You can always monkeypatch self.shap_explainer if desired...


In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True,
                        decision_trees=True)
db.run(8052)

## LogisticRegression

In [30]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

explainer = ClassifierExplainer(model, X_test, y_test, metric=roc_auc_score, 
                               shap='linear',
                               cats=['Sex', 'Deck', 'Embarked'],
                               idxs=test_names, #names of passengers 
                               descriptions=feature_descriptions,
                               labels=['Not survived', 'Survived'])

Generating self.shap_explainer = shap.LinearExplainer(model, X)...
You can always monkeypatch self.shap_explainer if desired...


In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=False, # Linear models have no interactions
                        decision_trees=False) # Linear models have no decision trees
db.run(8052)

## XGBClassifier

In [31]:
from xgboost import XGBClassifier

model = XGBClassifier()
model.fit(X_train, y_train)

explainer = ClassifierExplainer(model, X_test, y_test, roc_auc_score, 
                                  shap='tree', model_output="logodds", 
                                   cats=['Sex', 'Deck', 'Embarked'],
                                   idxs=test_names, #names of passengers 
                                   labels=['Not survived', 'Survived'])



Generating self.shap_explainer = shap.TreeExplainer(model)
You can always monkeypatch self.shap_explainer if desired...


In [95]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True) 
db.run(8052)

Calculating shap values...
Calculating predictions...
Calculating pred_percentiles...
Calculating prediction probabilities...
Calculating importances...
Calculating shap interaction values...
Running Model Explainer on http://localhost:8052
 * Serving Flask app "explainerdashboard.dashboards" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8052/ (Press CTRL+C to quit)
127.0.0.1 - - [03/Jun/2020 11:01:40] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:01:41] "[36mGET /assets/main.css?m=1590232199.1879547 HTTP/1.1[0m" 304 -
127.0.0.1 - - [03/Jun/2020 11:01:41] "[36mGET /assets/bootstrap.css?m=1570631370.0 HTTP/1.1[0m" 304 -
127.0.0.1 - - [03/Jun/2020 11:01:41] "[36mGET /assets/stylesheet.css?m=1570631370.0 HTTP/1.1[0m" 304 -
127.0.0.1 - - [03/Jun/2020 11:01:41] "[36mGET /assets/util.css?m=1513660272.0 HTTP/1.1[0m" 304 -
127.0.0.1 - - [03/Jun/2020 11:01:41] "[36mGET /assets/dash_bootstrap_components.min.js?m=1570631370.0 HTTP/1.1[0m" 304 -
127.0.0.1 - - [03/Jun/2020 11:01:42] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:01:42] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:01:42] "[37mGET /_dash-component-suites/dash_core_components/async-plotlyjs.v1_10_0m1588696753.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/

## LGBMClassifier

In [32]:
from lightgbm.sklearn import LGBMClassifier

model = LGBMClassifier()
model.fit(X_train, y_train)

explainer = ClassifierExplainer(model, X_test, y_test, roc_auc_score, 
                                shap='tree', 
                                #X_background=X_train,
                                model_output = "probability", # alternative: 'logodds'
                                cats=['Sex', 'Deck', 'Embarked'],
                                idxs=test_names, #names of passengers 
                                labels=['Not survived', 'Survived'])

Generating self.shap_explainer = shap.TreeExplainer(model, X, model_output='probability', feature_perturbation='interventional')...
Shap interaction values will not be available. If shap values in probability space are not necessary you can pass model_output='logodds' to get shap interation values back...
You can always monkeypatch self.shap_explainer if desired...


In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=False) # not supported for model_output = 'probability'
db.run(8052)

## CatBoostClassifier

In [None]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(iterations=100, learning_rate=100)

model.fit(X_train, y_train)

explainer = ClassifierBunch(model, X_test, y_test, roc_auc_score, shap='tree',
                                   cats=['Sex', 'Deck', 'Embarked'],
                                   idxs=test_names, #names of passengers 
                                   labels=['Not survived', 'Survived'])

In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=False) 
db.run(8052)

# RegressionBunch example

## Load regression data:
    - predicting the fare that a titanic passenger paid for their ticket

In [21]:
X_train, y_train, X_test, y_test = titanic_fare()
train_names, test_names = titanic_names()

## RandomForestRegressor

In [22]:
model = RandomForestRegressor(n_estimators=50, max_depth=5)
model.fit(X_train, y_train)

explainer = RandomForestRegressionExplainer(model, X_test, y_test, r2_score,
                shap='tree', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")

Generating self.shap_explainer = shap.TreeExplainer(model)
You can monkeypatch self.shap_explainer if desired...


In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True,
                        decision_trees=True)
db.run(8052)

## LinearRegression

In [23]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)
explainer = RegressionExplainer(model, X_test, y_test, r2_score, 
                shap='linear', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")

Generating self.shap_explainer = shap.LinearExplainer(model, X)...
You can monkeypatch self.shap_explainer if desired...


In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        §contributions=True,
                        shap_dependence=True,
                        shap_interaction=False)
db.run(8052)

## XGBRegressor

In [24]:
from xgboost import XGBRegressor

model = XGBRegressor()
model.fit(X_train, y_train)

explainer = RegressionExplainer(model, X_test, y_test, r2_score, 
                shap='tree', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")

Generating self.shap_explainer = shap.TreeExplainer(model)
You can monkeypatch self.shap_explainer if desired...


In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=False) #only model_output = "raw" supportted with interaction values for now
db.run(8052)

## LightGBM

In [27]:
from lightgbm.sklearn import LGBMRegressor

model = LGBMRegressor()
model.fit(X_train, y_train)

explainer = RegressionExplainer(model, X_test, y_test, r2_score, 
                shap='tree', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")

Generating self.shap_explainer = shap.TreeExplainer(model)
You can monkeypatch self.shap_explainer if desired...


In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True) 
db.run(8052)

## CatBoost

In [14]:
from catboost import CatBoostRegressor

model = CatBoostRegressor(iterations=100, learning_rate=0.1, verbose=0)
model.fit(X_train, y_train)

explainer = RegressionExplainer(model, X_test, y_test, r2_score, 
                shap='tree', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")

Generating self.shap_explainer = shap.TreeExplainer(model)...
You can monkeypatch self.shap_explainer if desired...


In [15]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=False) # interactions not supported for now due to this issue: https://github.com/slundberg/shap/issues/480 
db.run(8052)

Calculating shap values...
Calculating predictions...
Calculating prediction percentiles...
Calculating shap values...
Calculating importances...
Running Model Explainer on http://localhost:8052
 * Serving Flask app "explainerdashboard.dashboards" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8052/ (Press CTRL+C to quit)
127.0.0.1 - - [03/Jun/2020 11:45:15] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:45:17] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:45:17] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 204 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 204 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 204 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Jun/2020 11:45:18] "[37mPOST /_dash-upda