### Make required imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import train_test_split
from interpret import show
from interpret import data
from interpret.glassbox import ExplainableBoostingRegressor, LinearRegression, RegressionTree
from interpret.perf import RegressionPerf

### Load dataframe and separate feature and target sets

In [2]:
df = pd.read_pickle('COS_Seesaw_dataframe.pkl')
print(df)
columns = list(df.columns)
if 'COS_JFJ' in columns:
    columns.remove('COS_JFJ')
else:
    print('Error, target column not in dataframe')
    quit()

if 'time' in columns:
    columns.remove('time')

x = df[columns]
y = df['COS_JFJ']

           time  COS_JFJ     NH_sst     SH_sst
0    2000-01-07   444.18  12.305551  14.371146
1    2000-01-08   435.34  12.273712  14.423678
2    2000-01-11   435.61  12.221418  14.517519
3    2000-01-12   431.03  12.208945  14.525194
4    2000-01-13   434.76  12.200154  14.538042
...         ...      ...        ...        ...
1885 2018-09-26   437.56  15.888455  12.344624
1886 2018-09-27   438.04  15.837394  12.360606
1887 2018-09-28   433.50  15.800682  12.368045
1888 2018-09-29   443.24  15.764831  12.371663
1889 2018-09-30   437.52  15.745292  12.375092

[1890 rows x 4 columns]


### Make test train split, random seed is to ensure consistent results on multiple runs
### change  seed value to get different split

In [3]:
seed = 2
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=seed)

print(len(X_train))
print(len(y_train))

print(len(X_test))
print(len(y_test))

1323
1323
567
567


### Show Marginal

In [4]:
marginal = data.Marginal()
marginal_explanation = marginal.explain_data(X_train, y_train)
show(marginal_explanation)

### Train Models

In [5]:
ebm = ExplainableBoostingRegressor(random_state=seed, interactions=3)
ebm.fit(X_train, y_train)

lr = LinearRegression(random_state=seed)
lr.fit(X_train, y_train)

rt = RegressionTree(random_state=seed)
rt.fit(X_train, y_train)

<interpret.glassbox.decisiontree.RegressionTree at 0x7f3beeb0d410>

### What model learned

In [6]:
ebm_global = ebm.explain_global(name='EBM')
lr_global = lr.explain_global(name='Linear Regression')
rt_global = rt.explain_global(name='Regression Tree')

### Explanation of individul prediction

In [7]:
ebm_local = ebm.explain_local(X_test, y_test, name='EBM')
lr_local = lr.explain_local(X_test, y_test, name='Linear Regression')
rt_local = rt.explain_local(X_test, y_test, name='Regression Tree')

### Evaluate performance

In [8]:
ebm_perf = RegressionPerf(ebm.predict).explain_perf(X_test, y_test, name='EBM')
lr_perf = RegressionPerf(lr.predict).explain_perf(X_test, y_test, name='Linear Regression')
rt_perf = RegressionPerf(rt.predict).explain_perf(X_test, y_test, name='Regression Tree')

### Everything at once

In [9]:
show([marginal_explanation, lr_global, lr_perf, lr_local, rt_global, rt_perf, rt_local, ebm_global, ebm_perf, ebm_local])