In [None]:
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from aix360.algorithms.rbm import FeatureBinarizer, LogisticRuleRegression

from aif360.datasets import MEPSDataset19

import aix360

## Getting some data
### p 208

### Bring in the dataset

In [None]:
## p 208
med_data19 = MEPSDataset19()

#### some brief exploration not shown in book

In [None]:
med_data19.features.shape

In [None]:
med_data19.labels.shape

In [None]:
med_data19.label_names

In [None]:
type(med_data19)

In [None]:
med_data19.features

In [None]:
med_data19.labels

#### back to loading data and continuing

In [None]:
## p 208
X_train, X_test, y_train, y_test = train_test_split(
    med_data19.features, med_data19.labels, 
    random_state = 0, stratify = med_data19.labels) 

In [None]:
X_train.shape

In [None]:
X_train         = pd.DataFrame(X_train)
X_train.columns = med_data19.feature_names
X_train.head()

In [None]:
X_test         = pd.DataFrame(X_test)
X_test.columns = med_data19.feature_names

In [None]:
X_test.head()

In [None]:
y_train = y_train[:, 0]

In [None]:
y_test = y_test[:, 0]

## Building interpretable models

In [None]:
## preparation: we need to binarize inputs
## p 209
feat_bin = FeatureBinarizer(negations=True, returnOrd=True)

In [None]:
X_train, X_train_std = feat_bin.fit_transform(X_train)
X_test, X_testStd    = feat_bin.transform(X_test)

In [None]:
X_train['AGE'].head()

## GLRM
### p 207 and onwards

In [None]:
## p 210
lrr = LogisticRuleRegression(lambda0 = 0.005, lambda1 = 0.001, useOrd = True)
lrr.fit(X_train, y_train, X_train_std)

In [None]:
print('Train accuracy: %0.2f      Test accuracy: %0.2f' % 
          (accuracy_score(y_train, lrr.predict(X_train, X_train_std)),
          accuracy_score(y_test, lrr.predict(X_test, X_testStd))))

In [None]:
## p 211
df = lrr.explain()
df['rule/numerical feature'][1]
df.style.set_properties(subset=['rule/numerical feature'], **{'width': '300px'})

In [None]:
## p 212
df = lrr.explain(highDegOnly = True)
df.style.set_properties(subset=['rule'], **{'width': '300px'})

In [None]:
## from documentation
## lambda0 (float, optional) – Regularization - fixed cost of each rule
## lambda1 (float, optional) – Regularization - additional cost of each literal in rule

In [None]:
## p 215
## now let's imagine we're willing to allow more complex rules but want fewer rules. let's adjust lambda and see what that does to performance
lrr_alt = LogisticRuleRegression(lambda0=0.01, lambda1=0.0001, useOrd=True)
lrr_alt.fit(X_train, y_train, X_train_std)

In [None]:
print('Train accuracy: %0.2f      Test accuracy: %0.2f' % 
          (accuracy_score(y_train, lrr_alt.predict(X_train, X_train_std)),
          accuracy_score(y_test, lrr_alt.predict(X_test, X_testStd))))

In [None]:
## p 216
df_alt = lrr_alt.explain()
df_alt['rule'][1]
df_alt.style.set_properties(subset=['rule'], **{'width': '300px'})