# BNN with tabnet and TyXe
We use tabnet to train a tabular neural network on kaggle's insurance dataset and the lift the module to make it bayesian usinng TyXe package (based on pyro)

In [1]:
import pandas as pd
import numpy as np
import tyxe

from pytorch_tabnet.tab_model import TabNetRegressor
import torch
import torch.nn as nn
from torch.utils import data as torch_data

import pyro
import pyro.distributions as dist
import pyro.infer.autoguide as ag

from sklearn.preprocessing import StandardScaler
from category_encoders.ordinal import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from functools import partial
from copy import deepcopy
from utils import get_categs_mappings, get_emb_size

## Feature configuration

In [2]:
numerical_features = ['bmi', 'age']
categorical_features = ['sex', 'smoker', 'children', 'region']
features = categorical_features+numerical_features
target = 'charges'

## Load data
- load data
- define train/test split
- define normalization of target column using a 95 quantile division
- transform categorical variables in category dtypes

In [3]:
data = pd.read_csv('./data/insurance.csv')#.drop(['Unnamed: 0'], axis=1)

test_index = data.sample(frac=.2, random_state=1312).index
data['is_test'] = 0 
data.loc[test_index, 'is_test'] = 1

q95 = data.query('is_test == 0')[target].quantile(.95)

def transform(x):
    return x/q95

def inv_transf(x):
    return q95*x

data[target+'_transf'] = transform(data[target])

df_train = data.query('is_test == 0')
df_test = data.query('is_test == 1')

df_test.loc[:, categorical_features] = df_test.loc[:, categorical_features].astype('category').copy()
df_train.loc[:, categorical_features] = df_train.loc[:, categorical_features].astype('category').copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, val, pi)


In [4]:
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges,is_test,charges_transf
0,19,female,27.9,0,yes,southwest,16884.924,0,0.419684
1,18,male,33.77,1,no,southeast,1725.5523,0,0.042889
2,28,male,33.0,3,no,southeast,4449.462,0,0.110594
3,33,male,22.705,0,no,northwest,21984.47061,0,0.546435
4,32,male,28.88,0,no,northwest,3866.8552,0,0.096113


## Define preprocessing pipeline
- numerical scaler and imputer
- ordinal encoder and imputers for categorical data

In [5]:
categs_mappings = get_categs_mappings(df_train, categorical_features)

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('imputer', SimpleImputer(strategy='median', fill_value=np.NaN)), 
    ])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value="N/A")),
    ('label', OrdinalEncoder(handle_unknown="value", mapping=categs_mappings)), #uknown values goes to -1
    ('imputer_uknow', SimpleImputer(missing_values=-1, strategy='constant', fill_value=0)), #uknown values go to 0, the index for NaN
    ])

preprocessor = ColumnTransformer(#preprocessor rearenge columns
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numeric_transformer, numerical_features)]
)

preprocessor.fit(df_train[features])

ColumnTransformer(transformers=[('cat',
                                 Pipeline(steps=[('imputer',
                                                  SimpleImputer(fill_value='N/A',
                                                                strategy='constant')),
                                                 ('label',
                                                  OrdinalEncoder(mapping=[{'col': 0,
                                                                           'mapping': {'N/A': 0,
                                                                                       'female': 1,
                                                                                       'male': 2}},
                                                                          {'col': 1,
                                                                           'mapping': {'N/A': 0,
                                                                                       'no': 1,
           

## Define embedding size
Embedding size for categorical features

In [6]:
cat_dims = [df_train[feature].nunique()+1 for i, feature in enumerate(categorical_features)]
cat_idxs = [i for i, f in enumerate(features) if f in categorical_features]
cat_emb_dim = [get_emb_size(n_cat) for n_cat in cat_dims]

## Training deterministic model

In [7]:
regressor = TabNetRegressor(
    cat_dims=cat_dims, 
    cat_emb_dim=cat_emb_dim, 
    cat_idxs=cat_idxs
)

Device used : cpu


In [8]:
X_train = preprocessor.transform(df_train[features])
y_train = (df_train[target+'_transf']).values.reshape((-1, 1))

X_test = preprocessor.transform(df_test[features])
y_test = (df_test[target+'_transf']).values.reshape((-1, 1))

In [9]:
max_epochs = 100
regressor.fit(
    X_train=X_train, y_train=y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    eval_name=['train', 'test'],
    eval_metric=['mae'],
    max_epochs=max_epochs,
    patience=50,
    batch_size=128,
    num_workers=0,
    drop_last=False
)
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                       ('regressor', regressor)])

epoch 0  | loss: 0.48997 | train_mae: 0.77396 | test_mae: 0.69482 |  0:00:00s
epoch 1  | loss: 0.10518 | train_mae: 0.2907  | test_mae: 0.31867 |  0:00:01s
epoch 2  | loss: 0.08003 | train_mae: 0.26908 | test_mae: 0.30182 |  0:00:01s
epoch 3  | loss: 0.05918 | train_mae: 0.25625 | test_mae: 0.27893 |  0:00:02s
epoch 4  | loss: 0.05276 | train_mae: 0.20856 | test_mae: 0.23479 |  0:00:03s
epoch 5  | loss: 0.04156 | train_mae: 0.1962  | test_mae: 0.22004 |  0:00:03s
epoch 6  | loss: 0.03858 | train_mae: 0.18923 | test_mae: 0.21784 |  0:00:04s
epoch 7  | loss: 0.03794 | train_mae: 0.16298 | test_mae: 0.19171 |  0:00:04s
epoch 8  | loss: 0.0337  | train_mae: 0.15236 | test_mae: 0.1743  |  0:00:05s
epoch 9  | loss: 0.03259 | train_mae: 0.15398 | test_mae: 0.18334 |  0:00:06s
epoch 10 | loss: 0.03134 | train_mae: 0.13707 | test_mae: 0.16174 |  0:00:06s
epoch 11 | loss: 0.02803 | train_mae: 0.13164 | test_mae: 0.15224 |  0:00:07s
epoch 12 | loss: 0.02766 | train_mae: 0.1246  | test_mae: 0.1482

## Prediction and performance
- we compute **MAPE** and **MedianApe**

In [10]:
data[f'{target}_predict'] = inv_transf(pipeline.predict(data[features]))
data['error'] = data.eval(f'{target}_predict- {target}')
data['perc_error'] = data.eval(f'error/{target}')
data['abs_perc_error'] = data.eval('abs(perc_error)')

In [11]:
data.query('is_test == 1').abs_perc_error.mean(), data.query('is_test == 1').abs_perc_error.median()


(0.2498319248375792, 0.12326607586110114)

## Bayesian model using TyXe

In [12]:
net = deepcopy(regressor.network)

In [13]:
device = regressor.device

We don't want distributions over embedding or batchnorm layers

In [14]:
hide_module_types = (
    nn.modules.sparse.Embedding,
    nn.modules.BatchNorm1d
)

prior_kwargs = dict(
    expose_all=False, 
    hide_module_types=hide_module_types
)

In [15]:
prior = tyxe.priors.IIDPrior(
    dist.Normal(torch.zeros(1, device=device), torch.ones(1, device=device)),
    **prior_kwargs)

guide = partial(
    tyxe.guides.AutoNormal,
    init_loc_fn=tyxe.guides.PretrainedInitializer.from_net(net), 
    init_scale=1e-4,
    max_guide_scale=1, 
    train_loc=True
)

likelihood = tyxe.likelihoods.HomoskedasticGaussian(len(df_train), scale=0.1)
bnn = tyxe.VariationalBNN(net, prior, likelihood, guide)

In [16]:
X_train_torch = torch.Tensor(X_train)
y_train_torch = torch.Tensor(y_train)

In [17]:
dataset = torch_data.TensorDataset(X_train_torch, y_train_torch)
loader = torch_data.DataLoader(dataset, batch_size=len(X_train_torch))

In [18]:
pyro.clear_param_store()
optim = pyro.optim.Adam({"lr": 1e-3})
elbos = []
def callback(bnn, i, e):
    elbos.append(e)
    
#with tyxe.poutine.local_reparameterization():
bnn.fit(loader, optim, 10000, callback)

KeyError: 'tabnet.encoder.feat_transformers.2.shared.glu_layers.0.fc.weight'

In [None]:
getattr(net, "tabnet.encoder")

In [None]:
net.tabnet.encoder.feat_transformers[2].shared.glu_layers[0].fc.weight