## Setup a regression experiment

In [2]:
pip install interpret

Collecting interpret
  Downloading interpret-0.5.0-py3-none-any.whl (1.4 kB)
Collecting interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0 (from interpret)
  Downloading interpret_core-0.5.0-py3-none-any.whl (15.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.8/15.8 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
Collecting dash>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0->interpret)
  Downloading dash-2.14.2-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m51.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dash-core-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0->interpret)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-html-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0->interpret)
  Downloading 

In [3]:

pip install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283835 sha256=b34a452573823ca2930bda9dabc422d4818d1852bc6ae33104f0d61bffbb1eab
  Stored in directory: /root/.cache/pip/wheels/fd/a2/af/9ac0a1a85a27f314a06b39e1f492bee1547d52549a4606ed89
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1


In [4]:
# install interpret if not already installed
try:
    import interpret
except ModuleNotFoundError:
    !pip install --quiet interpret pandas scikit-learn

In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from interpret import show
from interpret.perf import RegressionPerf

X, y = load_diabetes(return_X_y=True, as_frame=True)

seed = 42
np.random.seed(seed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

## Explore the dataset

In [6]:
from interpret import show
from interpret.data import Marginal

marginal = Marginal().explain_data(X_train, y_train, name = 'Train Data')
show(marginal)

## Train the Explainable Boosting Machine (EBM)

In [7]:
from interpret.glassbox import ExplainableBoostingRegressor, LinearRegression, RegressionTree

ebm = ExplainableBoostingRegressor()
ebm.fit(X_train, y_train)

## EBMs are glassbox models, so we can edit them

In [8]:
# post-process monotonize the age feature
ebm.monotonize("age", increasing=True)

## Global Explanations: What the model learned overall

In [9]:
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)



## Local Explanations: How an individual prediction was made

In [10]:
ebm_local = ebm.explain_local(X_test[:5], y_test[:5], name='EBM')
show(ebm_local, 0)

## Evaluate EBM performance

In [11]:
ebm_perf = RegressionPerf(ebm).explain_perf(X_test, y_test, name='EBM')
show(ebm_perf)

## Let's test out a few other Explainable Models

In [12]:
from interpret.glassbox import LinearRegression, RegressionTree

lr = LinearRegression()
lr.fit(X_train, y_train)

rt = RegressionTree(random_state=seed)
rt.fit(X_train, y_train)

<interpret.glassbox._decisiontree.RegressionTree at 0x7fe8fee91b10>

## Compare performance using the Dashboard

In [13]:
lr_perf = RegressionPerf(lr).explain_perf(X_test, y_test, name='Linear Regression')
show(lr_perf)

In [14]:
rt_perf = RegressionPerf(rt).explain_perf(X_test, y_test, name='Regression Tree')
show(rt_perf)

## Glassbox: All of our models have global and local explanations

In [15]:
lr_global = lr.explain_global(name='Linear Regression')
show(lr_global)

In [16]:
rt_global = rt.explain_global(name='Regression Tree')
show(rt_global)

## Dashboard: look at everything at once

In [17]:
# Do everything in one shot with the InterpretML Dashboard by passing a list into show

show([marginal, lr_global, lr_perf, rt_global, rt_perf, ebm_global, ebm_perf])