Install dependencies and libraries

In [None]:
! pip install pytorch_tabular
! git clone https://github.com/manujosephv/pytorch_tabular
%cd pytorch_tabular

!python setup.py install
!pip install setuptools==59.5.0





In [None]:
# Import libraries

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
if not IN_COLAB:
    os.chdir("..")
%reload_ext autoreload
%autoreload 2

from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig, FTTransformerConfig, FTTransformerModel, TabNetModelConfig, TabNetModel, AutoIntConfig, AutoIntConfig, TabTransformerConfig, TabTransformerModel
from pytorch_tabular.models import AutoIntModel, AutoIntConfig, NodeConfig, NODEModel
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig, ModelConfig
from pytorch_tabular.models import BaseModel

import torch
import torch.nn as nn
import torch.nn.functional as F
from omegaconf import DictConfig
from typing import Dict
from dataclasses import dataclass, field


from sklearn.preprocessing import MinMaxScaler
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

# for plots
font = {'family': 'serif',
        'color':  'darkred',
        'weight': 'normal',
        'size': 16
        }

font_title = {'family': 'serif',
        'color':  'darkred',
        'weight': 'normal',
        'size': 20
        }

# Define functions

def set_total_seed(seed = 42):
  # Set random seed
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed);

def make_data(num_samples = 10000,
              x_1 = 10, x_2 = 6, x_3 = -1.5, x_error_2 = 1,
              sin_koef = 3):

  samples = num_samples
  x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)*5
  y_data = np.add(x_1 * x_data+x_2 * x_data**2+x_3 * x_data**3, np.multiply(x_error_2*(x_data)**2, np.sin(sin_koef*x_data)*np.random.standard_normal(x_data.shape)))


  X_train, X_val, y_train, y_val = train_test_split(x_data, y_data, random_state = 42)
  X_val, X_test, y_val, y_test = train_test_split(X_val, y_val)

  df_train = pd.DataFrame({'col1':X_train.flatten(), 'cat1':0, 'target':y_train.flatten()})
  df_valid = pd.DataFrame({'col1':X_val.flatten(), 'cat1':0, 'target':y_val.flatten()})
  df_test = pd.DataFrame({'col1':X_test.flatten(), 'cat1':0, 'target':y_test.flatten()})

  return df_train, df_valid, df_test

def make_additional_data(num_samples = 10000,
              x_1 = -2, x_2 = 4, x_3 = -0.5, x_error_2 = 2,
              sin_koef = 1, shift = 7):

  samples = num_samples
  x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)*5
  y_data = np.add(x_1 * x_data+x_2 * x_data**2+x_3 * x_data**3, np.multiply(x_error_2*(x_data)**2, np.sin(sin_koef*x_data)*np.random.standard_normal(x_data.shape)))


  X_train, X_val, y_train, y_val = train_test_split(x_data, y_data, random_state = 42)
  X_val, X_test, y_val, y_test = train_test_split(X_val, y_val)

  df_train = pd.DataFrame({'col1':X_train.flatten(), 'cat1':0, 'target':y_train.flatten()})
  df_train['col1'] = df_train['col1'] + shift
  df_valid = pd.DataFrame({'col1':X_val.flatten(), 'cat1':0, 'target':y_val.flatten()})
  df_valid['col1'] = df_valid['col1'] + shift
  df_test = pd.DataFrame({'col1':X_test.flatten(), 'cat1':0, 'target':y_test.flatten()})
  df_test['col1']= df_test['col1'] + shift
  
  df_train['col1'] = df_train['col1'].iloc[::-1]
  df_valid['col1'] = df_valid['col1'].iloc[::-1]
  df_test['col1'] = df_test['col1'].iloc[::-1]

  return df_train, df_valid, df_test


def setting_model_and_getting_predictions(model_config,
                                          optimizer_config,
                                          N_in_ensemble = 5, epochs = 10, 
                                          batch_size = 32,
                                          num_col_names = ['col1'],
                                          cat_col_names = ['cat1'], 
                                          auto_lr_find_for_trainer_config = False,
                                          early_stopping_patience_for_trainer_config = 5,
                                          GPU = -1):
  
  # sample subsamples for ensemble
  indexes = np.array([random.sample(range(0, int(len(df_train))), int(len(df_train)/N_in_ensemble)) for i in range(N_in_ensemble)])
  indexes_valid = np.array([random.sample(range(0, int(len(df_valid))), int(len(df_valid)/N_in_ensemble)) for i in range(N_in_ensemble)])

  steps_per_epoch = int((len(df_train)//batch_size)*0.9)

  # data settings
  data_config = DataConfig(
    target=['target'],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names)

  # define trainer configuration
  trainer_config = TrainerConfig(
    auto_lr_find = auto_lr_find_for_trainer_config, # Runs the LRFinder to automatically derive a learning rate
    batch_size = batch_size,
    max_epochs = epochs,
    early_stopping_patience = early_stopping_patience_for_trainer_config,
    gpus=GPU,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
  )

  # construct model
  tabular_model = TabularModel(
      data_config = data_config,
      model_config = model_config,
      optimizer_config = optimizer_config,
      trainer_config = trainer_config
  )

  #make predictions

  pred_df = []
  for i in range(N_in_ensemble):
    tabular_model.fit(train = df_train.iloc[indexes[i], :], validation=df_valid.iloc[indexes_valid[i], :])
    pred_df.append(tabular_model.predict(df_test, ret_logits=False))
  
  return pred_df



FT-Transformer

In [None]:
# x_data = np.arange(-5.1, 5, 0.01)
# y_data = np.add(x_data ** 2, np.multiply(0.3 * (x_data)**2, np.random.standard_normal(x_data.shape)))

# plt.scatter(x_data, y_data);

# X_train, X_val, y_train, y_val = train_test_split(x_data, y_data, random_state = 42)
# X_val, X_test, y_val, y_test = train_test_split(X_val, y_val)

# df_train = pd.DataFrame({'col1':X_train.flatten(), 'cat1':0, 'target':y_train.flatten()})
# df_valid = pd.DataFrame({'col1':X_val.flatten(), 'cat1':0, 'target':y_val.flatten()})
# df_test = pd.DataFrame({'col1':X_test.flatten(), 'cat1':0, 'target':y_test.flatten()})

# # parameters
# N_in_ensemble = 2
# batch_size = 32
# num_col_names = ['col1']
# cat_col_names = ['cat1']
# epochs = 100
# GPU = -1
# early_stopping_patience_for_trainer_config = 5
# auto_lr_find_for_trainer_config = False


# # indexes
# indexes = np.array([random.sample(range(0, int(len(df_train))), int(len(df_train)/N_in_ensemble)) for i in range(N_in_ensemble)])
# indexes_valid = np.array([random.sample(range(0, int(len(df_valid))), int(len(df_valid)/N_in_ensemble)) for i in range(N_in_ensemble)])

# steps_per_epoch = int((len(df_train)//batch_size)*0.9)


# # data settings
# data_config = DataConfig(
#   target=['target'],
#   continuous_cols=num_col_names,
#   categorical_cols=cat_col_names)

# # define trainer configuration
# trainer_config = TrainerConfig(
#   auto_lr_find = auto_lr_find_for_trainer_config, # Runs the LRFinder to automatically derive a learning rate
#   batch_size = batch_size,
#   max_epochs = epochs,
#   early_stopping_patience = early_stopping_patience_for_trainer_config,
#   gpus=GPU,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
# )
# optimizer_config_fttransformer = OptimizerConfig(lr_scheduler="ExponentialLR", lr_scheduler_params={"gamma":0.9})

# model_config_fttransformer = FTTransformerConfig(task = "regression",
#                                                   learning_rate=1e-4,
#                                                   seed = 42,
#                                                   input_embed_dim = 16,
#                                                   num_heads = 8,
#                                                   num_attn_blocks = 20,
#                                                   ff_dropout = 0.25,
#                                                   out_ff_layers = "1024-1024-512",
#                                                   out_ff_activation = "LeakyReLU",
#                                                   attn_dropout=0.25,
#                                                   embedding_dropout = 0.25,
#                                                   #out_ff_activation = "ReLU",
#                                                   out_ff_initialization="kaiming",
#                                                   batch_norm_continuous_input=False,
#                                                   output_dim = 2                                               
#                                               )


# # construct model
# tabular_model = TabularModel(
#     data_config = data_config,
#     model_config = model_config_fttransformer,
#     optimizer_config = optimizer_config_fttransformer,
#     trainer_config = trainer_config)

# pred_df_x2 = []
# for i in range(N_in_ensemble):
#   tabular_model.fit(train = df_train.iloc[indexes[i], :], validation=df_valid.iloc[indexes_valid[i], :])
#   pred_df_x2.append(tabular_model.predict(df_test, ret_logits=False))

In [None]:
# plt.scatter(pred_df_x2[0]['col1'], pred_df_x2[0]['target_prediction'])

In [None]:
# plt.scatter(df_test['col1'], df_test['target'])

In [None]:
df_test_gap = pd.DataFrame({'col1':np.arange(5,7, 0.02), 'cat1':np.array([0]*100), 'target':np.array([5]*100)})


In [None]:
# FT-Transformer dropout insert
set_total_seed(seed = 42)
plt.figure(figsize=(13,10))
df_train, df_valid, df_test = make_data(num_samples=3000)
df_train_1, df_valid_1, df_test_1 = make_additional_data(num_samples=3000)

df_train_new = pd.concat([df_train, df_train_1], axis = 0)
df_valid_new = pd.concat([df_valid, df_valid_1], axis = 0)
df_test_new = pd.concat([df_test, df_test_1, df_test_gap], axis = 0)


optimizer_config_fttransformer = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config_fttransformer = FTTransformerConfig(task = "regression",
                                                  learning_rate=1e-4,
                                                  seed = 42,
                                                  input_embed_dim = 16,
                                                  num_heads = 8,
                                                  num_attn_blocks = 20,
                                                  ff_dropout = 0.25,
                                                  out_ff_layers = "1024-1024-512",
                                                  out_ff_activation = "LeakyReLU",
                                                  attn_dropout=0.25,
                                                  embedding_dropout = 0.25,
                                                  #out_ff_activation = "ReLU",
                                                  out_ff_initialization="kaiming",
                                                  batch_norm_continuous_input=False
                                              )

pred_fttransformer_1 = setting_model_and_getting_predictions(model_config = model_config_fttransformer,
                                                            optimizer_config = optimizer_config_fttransformer)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_fttransformer_1[i]['col1'], pred_fttransformer_1[i]['target_prediction'], label = 'FT-Transformer prediction (with LeakyReLU activation. {})'.format(i));


plt.xlabel("X", fontdict = font);
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font);
plt.title("Heteroscedastic and continuous dataset. FT-Transformer", fontdict = font_title);
plt.legend(fontsize = 14);

In [None]:
plt.scatter(np.array(col_1[0]), np.array(pred_1).mean(axis = 0))

In [None]:


pred_1 = []
col_1 = []
for i in range(len(pred_fttransformer_1)):
  pred_1.append(pred_fttransformer_1[i]['target_prediction'])
  col_1.append(pred_fttransformer_1[i]['col1'])

mean = np.array(pred_1).mean(axis = 0)
upper = np.array(pred_1).mean(axis = 0) + 2 * np.array(pred_1).std(axis = 0)
lower = np.array(pred_1).mean(axis = 0) - 2 * np.array(pred_1).std(axis = 0)

plt.figure(figsize = (14,7))
plt.scatter(col_1[0], upper, color = "green", alpha=.8, label = 'Mean + 2Std')
plt.scatter(col_1[0], lower, color = "green", alpha=.8, label = 'Mean - 2Std')
plt.scatter(col_1[0], mean, color = "blue", alpha = 1, label='Mean')
plt.scatter(col_1[0],  df_test['target'], label = 'True', color = 'black', alpha = 0.6)

#plt.fill_between(np.arange(0, 5, 5/188), (mean-(mean - lower)), (mean+(upper-mean)), color='green', alpha=.3, label = "Cl=95%")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend();

In [None]:
# FT-Transformer dropout insert

set_total_seed(seed = 42)

plt.figure(figsize=(13,10))
df_test_gap = pd.DataFrame({'col1':np.arange(5,9, 0.02), 'cat1':np.array([0]*200), 'target':np.array([5]*200)})
df_train, df_valid, df_test = make_data(num_samples=2000)
df_train_1, df_valid_1, df_test_1 = make_additional_data(num_samples=2000, shift = 9)

df_train = pd.concat([df_train, df_train_1], axis = 0)
df_valid = pd.concat([df_valid, df_valid_1], axis = 0)
df_test = pd.concat([df_test, df_test_1, df_test_gap], axis = 0)


optimizer_config_fttransformer = OptimizerConfig(lr_scheduler="ExponentialLR", lr_scheduler_params={"gamma":0.98})
model_config_fttransformer = FTTransformerConfig(task = "regression",
                                                  learning_rate=1e-4,
                                                  seed = 42,
                                                  input_embed_dim = 16,
                                                  num_heads = 8,
                                                  num_attn_blocks = 20,
                                                  ff_dropout = 0.0,
                                                  out_ff_layers = "1024-1024-512",
                                                  out_ff_activation = "LeakyReLU",
                                                  attn_dropout=0.0,
                                                  embedding_dropout = 0.0,
                                                  #out_ff_activation = "ReLU",
                                                  out_ff_initialization="kaiming",
                                                  batch_norm_continuous_input=False
                                              )

pred_fttransformer = setting_model_and_getting_predictions(model_config = model_config_fttransformer,
                                                            optimizer_config = optimizer_config_fttransformer)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_fttransformer[i]['col1'], pred_fttransformer[i]['target_prediction'], label = 'FT-Transformer prediction (with LeakyReLU activation. {})'.format(i));


plt.xlabel("X", fontdict = font);
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font);
plt.title("Heteroscedastic and continuous dataset. FT-Transformer", fontdict = font_title);
plt.legend(fontsize = 14);

In [None]:
pred_2 = []
col_1 = []
for i in range(len(pred_fttransformer)):
  pred_2.append(pred_fttransformer[i]['target_prediction'])
col_1.append(pred_fttransformer[i]['col1'])

mean = np.array(pred_2).mean(axis = 0)
upper = np.array(pred_2).mean(axis = 0) + 2 * np.array(pred_2).std(axis = 0)
lower = np.array(pred_2).mean(axis = 0) - 2 * np.array(pred_2).std(axis = 0)

plt.figure(figsize = (14,7))
plt.scatter(col_1[0],  df_test['target'], label = "True", color = 'black', alpha = 0.6)
plt.scatter(col_1[0], upper, color = "green", alpha=.8, label = 'Mean + 2Std')
plt.scatter(col_1[0], lower, color = "green", alpha=.8, label = 'Mean - 2Std')
plt.scatter(col_1[0], mean, color = "blue", alpha = 1, label='Mean')


#plt.fill_between(np.arange(0, 5, 5/188), (mean-(mean - lower)), (mean+(upper-mean)), color='green', alpha=.3, label = "Cl=95%")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend();

In [None]:
dt = pd.concat([make_data(num_samples=3000)[2], make_additional_data(num_samples=3000)[2]], axis = 0)

pred_2 = []
col_1 = []
for i in range(len(pred_fttransformer)):
  pred_2.append(pred_fttransformer[i]['target_prediction'])
col_1.append(pred_fttransformer[i]['col1'])

mean = np.array(pred_2).mean(axis = 0)
upper = np.array(pred_2).mean(axis = 0) + 2 * np.array(pred_2).std(axis = 0)
lower = np.array(pred_2).mean(axis = 0) - 2 * np.array(pred_2).std(axis = 0)

plt.figure(figsize = (14,7))
plt.scatter(dt['col1'],  dt['target'], label = "True", color = 'black', alpha = 0.6)
plt.scatter(col_1[0], upper, color = "green", alpha=.8, label = 'Mean + 2Std')
plt.scatter(col_1[0], lower, color = "green", alpha=.8, label = 'Mean - 2Std')
plt.scatter(col_1[0], mean, color = "blue", alpha = 1, label='Mean')


#plt.fill_between(np.arange(0, 5, 5/188), (mean-(mean - lower)), (mean+(upper-mean)), color='green', alpha=.3, label = "Cl=95%")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend();

In [None]:
#checking epistemic unsertainty

df_test_gap = pd.DataFrame({'col1':np.arange(5,7, 0.02), 'cat1':np.array([0]*100), 'target':np.array([5]*100)})
df_test_gap_1 = pd.DataFrame({'col1':np.arange(-6,0, 0.02), 'cat1':np.array([0]*300), 'target':np.array([5]*300)})
df_test_gap_2 = pd.DataFrame({'col1':np.arange(14,20, 0.02), 'cat1':np.array([0]*300), 'target':np.array([5]*300)})
df_test_gap = pd.concat([df_test_gap, df_test_gap_1, df_test_gap_2], axis = 0)

set_total_seed(seed = 42)
plt.figure(figsize=(13,10))
df_train, df_valid, df_test = make_data(num_samples=3000)
df_train_1, df_valid_1, df_test_1 = make_additional_data(num_samples=3000)

df_train = pd.concat([df_train, df_train_1], axis = 0)
df_valid = pd.concat([df_valid, df_valid_1], axis = 0)
df_test = pd.concat([df_test, df_test_1, df_test_gap], axis = 0)




optimizer_config_fttransformer = OptimizerConfig(lr_scheduler="ExponentialLR", lr_scheduler_params={"gamma":0.98})
model_config_fttransformer = FTTransformerConfig(task = "regression",
                                                  learning_rate=1e-4,
                                                  seed = 42,
                                                  input_embed_dim = 16,
                                                  num_heads = 8,
                                                  num_attn_blocks = 20,
                                                  ff_dropout = 0.0,
                                                  out_ff_layers = "1024-1024-512",
                                                  out_ff_activation = "LeakyReLU",
                                                  attn_dropout=0.0,
                                                  embedding_dropout = 0.0,
                                                  #out_ff_activation = "ReLU",
                                                  out_ff_initialization="kaiming",
                                                  batch_norm_continuous_input=False
                                              )

pred_fttransformer_epistemic = setting_model_and_getting_predictions(model_config = model_config_fttransformer,
                                                            optimizer_config = optimizer_config_fttransformer)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_fttransformer_epistemic[i]['col1'], pred_fttransformer_epistemic[i]['target_prediction'],
              label = 'FT-Transformer prediction (with LeakyReLU activation. {})'.format(i));


plt.xlabel("X", fontdict = font);
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font);
plt.title("Heteroscedastic and continuous dataset. FT-Transformer", fontdict = font_title);
plt.legend(fontsize = 14);

In [None]:
pred_3 = []
col_1 = []
df_test_new_1 = pd.concat([make_data(num_samples=3000)[2], make_additional_data(num_samples=3000)[2]], axis = 0)
for i in range(len(pred_fttransformer_epistemic)):
  pred_3.append(pred_fttransformer_epistemic[i]['target_prediction'])
col_1.append(pred_fttransformer_epistemic[i]['col1'])

mean = np.array(pred_3).mean(axis = 0)
upper = np.array(pred_3).mean(axis = 0) + 2 * np.array(pred_3).std(axis = 0)
lower = np.array(pred_3).mean(axis = 0) - 2 * np.array(pred_3).std(axis = 0)

plt.figure(figsize = (18,15))
plt.scatter(df_test_new_1['col1'],  df_test_new_1['target'], label = "True", color = 'black', alpha = 0.6)
plt.scatter(col_1[0], upper, color = "green", alpha=.8, label = 'Mean + 2Std')
plt.scatter(col_1[0], lower, color = "green", alpha=.8, label = 'Mean - 2Std')
plt.scatter(col_1[0], mean, color = "blue", alpha = 1, label='Mean')
plt.ylim((-25, 75))


#plt.fill_between(np.arange(0, 5, 5/188), (mean-(mean - lower)), (mean+(upper-mean)), color='green', alpha=.3, label = "Cl=95%")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend();

In [None]:
# FT-Transformer dropout insert very high dropout

set_total_seed(seed = 42)

plt.figure(figsize=(13,10))
df_test_gap = pd.DataFrame({'col1':np.arange(5,9, 0.02), 'cat1':np.array([0]*200), 'target':np.array([5]*200)})
df_train, df_valid, df_test = make_data(num_samples=2000)
df_train_1, df_valid_1, df_test_1 = make_additional_data(num_samples=2000, shift = 9)

df_train = pd.concat([df_train, df_train_1], axis = 0)
df_valid = pd.concat([df_valid, df_valid_1], axis = 0)
df_test = pd.concat([df_test, df_test_1, df_test_gap], axis = 0)


optimizer_config_fttransformer = OptimizerConfig(lr_scheduler="ExponentialLR", lr_scheduler_params={"gamma":0.7})
model_config_fttransformer = FTTransformerConfig(task = "regression",
                                                  learning_rate=1e-4,
                                                  seed = 42,
                                                  input_embed_dim = 16,
                                                  num_heads = 8,
                                                  num_attn_blocks = 20,
                                                  ff_dropout = 0.25,
                                                  out_ff_layers = "1024-1024-512",
                                                  out_ff_activation = "LeakyReLU",
                                                  attn_dropout=0.25,
                                                  embedding_dropout = 0.25,
                                                  #out_ff_activation = "ReLU",
                                                  out_ff_initialization="kaiming",
                                                  batch_norm_continuous_input=False
                                              )

pred_fttransformer = setting_model_and_getting_predictions(model_config = model_config_fttransformer,
                                                            optimizer_config = optimizer_config_fttransformer)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_fttransformer[i]['col1'], pred_fttransformer[i]['target_prediction'], label = 'FT-Transformer prediction (with LeakyReLU activation. {})'.format(i));


plt.xlabel("X", fontdict = font);
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font);
plt.title("Heteroscedastic and continuous dataset. FT-Transformer", fontdict = font_title);
plt.legend(fontsize = 14);

In [None]:
# FT-Transformer
set_total_seed(seed = 42)
plt.figure(figsize=(13,10))
df_train, df_valid, df_test = make_data(num_samples=10000)

optimizer_config_fttransformer = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config_fttransformer = FTTransformerConfig(task = "regression",
                                                  learning_rate=1e-4,
                                                  seed = 42,
                                                  input_embed_dim = 40,
                                                  num_heads = 10,
                                                  num_attn_blocks = 20,
                                                  ff_dropout = 0.2,
                                                  out_ff_layers = "256-128-128",
                                                  out_ff_activation = "LeakyReLU",
                                                  attn_dropout=0.2,
                                                  embedding_dropout = 0.2,
                                                  #out_ff_activation = "ReLU",
                                                  out_ff_initialization="kaiming",
                                                  batch_norm_continuous_input=False
                                              )

pred_fttransformer = setting_model_and_getting_predictions(model_config = model_config_fttransformer,
                                                             optimizer_config = optimizer_config_fttransformer)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_fttransformer[i]['col1'], pred_fttransformer[i]['target_prediction'], label = 'FT-Transformer prediction (with ReLU activation. {})'.format(i))


plt.xlabel("X", fontdict = font)
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font)
plt.title("Heteroscedastic and continuous dataset. FT-Transformer", fontdict = font_title)
plt.legend(fontsize = 14);

TabTransformer

In [None]:
set_total_seed(seed = 42)
plt.figure(figsize=(13,10))
df_train, df_valid, df_test = make_data(num_samples=10000)

optimizer_config_tabtransformer = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config_tabtransformer = TabTransformerConfig(task = "regression", loss = "MSELoss")

pred_tab_transformer = setting_model_and_getting_predictions(model_config = model_config_tabtransformer,
                                                             optimizer_config = optimizer_config_tabtransformer)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_tab_transformer[i]['col1'], pred_tab_transformer[i]['target_prediction'], label = 'TabTransformer prediction (with ReLU activation. {})'.format(i))


plt.xlabel("X", fontdict = font)
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font)
plt.title("Heteroscedastic and continuous dataset. TabTransformer", fontdict = font_title)
plt.legend(fontsize = 14);

In [None]:
#Auto-int
# FT-Transformer
set_total_seed(seed = 42)
plt.figure(figsize=(13,10))
df_train, df_valid, df_test = make_data(num_samples=10000)

optimizer_config_autoint = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config_autoint = AutoIntConfig(task = "regression" )

pred_autoint = setting_model_and_getting_predictions(model_config = model_config_autoint,
                                                           optimizer_config = optimizer_config_autoint)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_autoint[i]['col1'], pred_autoint[i]['target_prediction'], label = 'AutoInt prediction (with ReLU activation. {})'.format(i))


plt.xlabel("X", fontdict = font)
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font)
plt.title("Heteroscedastic and continuous dataset. AutoInt", fontdict = font_title)
plt.legend(fontsize = 14);





In [None]:
#Node

set_total_seed(seed = 42)
plt.figure(figsize=(13,10))
df_train, df_valid, df_test = make_data(num_samples=10000)

optimizer_config_node = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config_node = NodeConfig(task = "regression")

pred_node = setting_model_and_getting_predictions(model_config = model_config_node,
                                                  optimizer_config = optimizer_config_node)

plt.figure(figsize = (15,12))
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')

#as previously defined
N_in_ensemble = 5
for i in range(N_in_ensemble):
  plt.scatter(pred_node[i]['col1'], pred_node[i]['target_prediction'], label = 'Node prediction (with ReLU activation. {})'.format(i))


plt.xlabel("X", fontdict = font)
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font)
plt.title("Heteroscedastic and continuous dataset. Node", fontdict = font_title)
plt.legend(fontsize = 14);





In [None]:
# def setting_model_and_getting_predictions_dif_seeds(N_in_ensemble = 7, epochs = 10, 
#                                                     batch_size = 32,
#                                                     num_col_names = ['col1'],
#                                                     cat_col_names = ['cat1'], 
#                                                     auto_lr_find_for_trainer_config = False,
#                                                     early_stopping_patience_for_trainer_config = 5,
#                                                     GPU = -1):
  
#   pred_df = []
#   for seed in np.arange(N_in_ensemble):
#     # set seed
#     set_total_seed(seed = seed)
    
#     # Data settings
#     data_config = DataConfig(
#       target=['target'],
#       continuous_cols=num_col_names,
#       categorical_cols=cat_col_names)
    
#     # Optimizer configuration
#     optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})

#     # define trainer configuration
#     trainer_config = TrainerConfig(
#       auto_lr_find = auto_lr_find_for_trainer_config, # Runs the LRFinder to automatically derive a learning rate
#       batch_size = batch_size,
#       max_epochs = epochs,
#       early_stopping_patience = early_stopping_patience_for_trainer_config,
#       gpus=GPU,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
#     )

    

#     # construct model
#     tabular_model = TabularModel(
#         data_config = data_config,
#         model_config = model_config,
#         optimizer_config = optimizer_config,
#         trainer_config = trainer_config
#     )

#     #make predictions

#     tabular_model.fit(train = df_train, validation=df_valid)
#     pred_df.append(tabular_model.predict(df_test, ret_logits=False))

#   return pred_df

Experiment 1 (FT-Transformer, AutoInt, Tabnet)

In [None]:

# data making
df_train, df_valid, df_test = make_data(num_samples=10000)
###### Parameters
N_in_ensemble = 7
epochs = 10
batch_size = 32
num_col_names = ['col1']
cat_col_names = ['cat1']
auto_lr_find_for_trainer_config = False
early_stopping_patience_for_trainer_config = 5
GPU = -1

######



pred_df_fttransformer = []
pred_df_autoint = []
pred_df_tabtransformer = []


for seed in [1,2,3,4,5,6,7]:
    
    # set seed
    #set_total_seed(seed = seed)
    
    # Data settings
    data_config = DataConfig(
      target=['target'],
      continuous_cols=num_col_names,
      categorical_cols=cat_col_names)
    
    # Optimizer configuration
    optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})

    # Model config
    model_config_ftt_ransfomer = FTTransformerConfig(task = "regression",
                                                    learning_rate=1e-4,
                                                    seed = seed,
                                                    input_embed_dim = 40,                                          
                                                    num_heads = 10,
                                                    num_attn_blocks = 20,
                                                    ff_dropout = 0.2,
                                                    out_ff_layers = "256-128-128",
                                                    out_ff_activation = "LeakyReLU",
                                                    attn_dropout=0.2,
                                                    embedding_dropout = 0.2,
                                                    #out_ff_activation = "ReLU",
                                                    out_ff_initialization="kaiming",
                                                    batch_norm_continuous_input=False)
    
    model_config_autoint = AutoIntConfig(task = "regression", seed = seed)
    model_config_tabtransformer = TabTransformerConfig(task = "regression", loss = "MSELoss", seed = seed)


    # define trainer configuration
    trainer_config = TrainerConfig(
      auto_lr_find = auto_lr_find_for_trainer_config, # Runs the LRFinder to automatically derive a learning rate
      batch_size = batch_size,
      max_epochs = epochs,
      early_stopping_patience = early_stopping_patience_for_trainer_config,
      gpus=GPU,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
    )

    

    # construct model
    tabular_model_fttransformer = TabularModel(
        data_config = data_config,
        model_config = model_config_ftt_ransfomer,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )

    tabular_model_autoint = TabularModel(
        data_config = data_config,
        model_config = model_config_autoint,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )

    tabular_model_tabtransformer = TabularModel(
        data_config = data_config,
        model_config = model_config_tabtransformer,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )

    
    # fit
    tabular_model_fttransformer.fit(train = df_train, validation=df_valid)
    tabular_model_autoint.fit(train = df_train, validation=df_valid)
    tabular_model_tabtransformer.fit(train = df_train, validation=df_valid)

    # make predictions
    pred_df_fttransformer.append(tabular_model_fttransformer.predict(df_test, ret_logits=False))
    pred_df_autoint.append(tabular_model_autoint.predict(df_test, ret_logits=False))
    pred_df_tabtransformer.append(tabular_model_tabtransformer.predict(df_test, ret_logits=False))

In [None]:
# Plotting

# Set figure size
plt.figure(figsize=(22, 12))

# First raw

plt.subplot(1, 3, 1)
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_df_fttransformer[i]['col1'], pred_df_fttransformer[i]['target_prediction'], label = 'FT-Transformer prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(underexposed)
plt.title('FT-Transformer')


plt.subplot(1, 3, 2)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_df_autoint[i]['col1'], pred_df_autoint[i]['target_prediction'], label = 'AutoInt prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(properly_exposed)
plt.title('AutoInt')


plt.subplot(1, 3, 3)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_df_tabtransformer[i]['col1'], pred_df_tabtransformer[i]['target_prediction'], label = 'TabTransformer prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(properly_exposed)
plt.title('TabTransformer')

In [None]:
def c(y_pred, y_true):
    print(y_pred.dtype)
    N = y_true.shape[0]
    se = torch.pow((y_true[:,0]-y_pred[:,0]),2)
    inv_std = torch.exp(-y_pred[:,1])
    mse = torch.mean(inv_std*se)
    reg = torch.mean(y_pred[:,1])
    return 0.5*(mse + reg)

In [None]:
# simplier data

# aleatoric loss function
def aleatoric_loss(y_pred, y_true):
    print(y_pred.dtype)
    N = y_true.shape[0]
    se = torch.pow((y_true[:,0]-y_pred[:,0]),2)
    inv_std = torch.exp(-y_pred[:,1])
    mse = torch.mean(inv_std*se)
    reg = torch.mean(y_pred[:,1])
    return 0.5*(mse + reg)


# data making
df_train, df_valid, df_test = make_data(num_samples=10000)
###### Parameters
N_in_ensemble = 3
epochs = 10
batch_size = 64
num_col_names = ['col1']
cat_col_names = ['cat1']
auto_lr_find_for_trainer_config = False
early_stopping_patience_for_trainer_config = 5
GPU = -1

######



pred_df_fttransformer = []
pred_df_autoint = []
pred_df_tabtransformer = []


for seed in [1,2,3]:
    
    # set seed
    #set_total_seed(seed = seed)
    
    # Data settings
    data_config = DataConfig(
      target=['target'],
      continuous_cols=num_col_names,
      categorical_cols=cat_col_names)
    
    # Optimizer configuration
    optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})

    # Model config
    model_config_ftt_ransfomer = FTTransformerConfig(task = "regression",
                                                    learning_rate=1e-4,
                                                    seed = seed,
                                                    input_embed_dim = 40,                                          
                                                    num_heads = 10,
                                                    num_attn_blocks = 20,
                                                    ff_dropout = 0.2,
                                                    out_ff_layers = "256-128-128",
                                                    out_ff_activation = "LeakyReLU",
                                                    attn_dropout=0.2,
                                                    embedding_dropout = 0.2,
                                                    #out_ff_activation = "ReLU",
                                                    out_ff_initialization="kaiming",
                                                    batch_norm_continuous_input=False,
                                                    loss = aleatoric_loss)
    
    model_config_autoint = AutoIntConfig(task = "regression", loss = aleatoric_loss, seed = seed)
    model_config_tabtransformer = TabTransformerConfig(task = "regression", loss = aleatoric_loss, seed = seed)


    # define trainer configuration
    trainer_config = TrainerConfig(
      auto_lr_find = auto_lr_find_for_trainer_config, # Runs the LRFinder to automatically derive a learning rate
      batch_size = batch_size,
      max_epochs = epochs,
      early_stopping_patience = early_stopping_patience_for_trainer_config,
      gpus=GPU,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
    )

    

    # construct model
    tabular_model_fttransformer = TabularModel(
        data_config = data_config,
        model_config = model_config_ftt_ransfomer,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )

    tabular_model_autoint = TabularModel(
        data_config = data_config,
        model_config = model_config_autoint,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )

    tabular_model_tabtransformer = TabularModel(
        data_config = data_config,
        model_config = model_config_tabtransformer,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )

    
    # fit
    tabular_model_fttransformer.fit(train = df_train, validation=df_valid)
    tabular_model_autoint.fit(train = df_train, validation=df_valid)
    tabular_model_tabtransformer.fit(train = df_train, validation=df_valid)

    # make predictions
    pred_df_fttransformer.append(tabular_model_fttransformer.predict(df_test, ret_logits=False))
    pred_df_autoint.append(tabular_model_autoint.predict(df_test, ret_logits=False))
    pred_df_tabtransformer.append(tabular_model_tabtransformer.predict(df_test, ret_logits=False))

Tabnet and Node adding

In [None]:
df_train, df_valid, df_test = make_data(num_samples=5000)


epochs = 10
batch_size = 16
num_col_names = ['col1']
cat_col_names = ['cat1']
auto_lr_find_for_trainer_config = False
early_stopping_patience_for_trainer_config = 5
GPU = -1

pred_df_tabnet = []
pred_df_node = []


for seed in [1,2,3]:
    
    
    # Data settings
    data_config = DataConfig(
      target=['target'],
      continuous_cols=num_col_names,
      categorical_cols=cat_col_names)
    
    # Optimizer configuration
    optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})

    # Model config
    model_config_node = NodeConfig(num_layers = 5, num_trees = 100, input_dropout = 0.2,
                                   depth = 10, task = "regression", seed = seed)
    model_config_tabnet = TabNetModelConfig(learning_rate = 0.001, n_d = 1, n_steps = 20,
                                            task = "regression", seed = seed)


    # define trainer configuration
    trainer_config = TrainerConfig(
      auto_lr_find = auto_lr_find_for_trainer_config, # Runs the LRFinder to automatically derive a learning rate
      batch_size = batch_size,
      max_epochs = epochs,
      early_stopping_patience = early_stopping_patience_for_trainer_config,
      gpus=GPU,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
    )

    

    # construct model
    tabular_model_node = TabularModel(
        data_config = data_config,
        model_config = model_config_node,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )

    tabular_model_tabnet = TabularModel(
        data_config = data_config,
        model_config = model_config_tabnet,
        optimizer_config = optimizer_config,
        trainer_config = trainer_config
    )


    
    # fit
    tabular_model_node.fit(train = df_train, validation=df_valid)
    tabular_model_tabnet.fit(train = df_train, validation=df_valid)
    

    # make predictions
    pred_df_tabnet.append(tabular_model_tabnet.predict(df_test, ret_logits=False))
    pred_df_node.append(tabular_model_node.predict(df_test, ret_logits=False))
    

In [None]:
# Plotting
N_in_ensemble = 3
# Set figure size
plt.figure(figsize=(22, 12))

# First raw

plt.subplot(1, 2, 1)
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_df_node[i]['col1'], pred_df_node[i]['target_prediction'], label = 'Node prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(underexposed)
plt.title('Node')


plt.subplot(1, 2, 2)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_df_tabnet[i]['col1'], pred_df_tabnet[i]['target_prediction'], label = 'Tabnet prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(properly_exposed)
plt.title('Tabnet')


In [None]:
# Data definition
plt.figure(figsize=(13,10))
df_train, df_valid, df_test = make_data(num_samples=1000)

N_in_ensemble = 7

# Set configurations
model_config_ftt_ransfomer = FTTransformerConfig(task = "regression",
                                          learning_rate=1e-4,
                                          input_embed_dim = 40,                                          
                                          num_heads = 10,
                                          num_attn_blocks = 20,
                                          ff_dropout = 0.2,
                                          out_ff_layers = "256-128-128",
                                          out_ff_activation = "LeakyReLU",
                                          attn_dropout=0.2,
                                          embedding_dropout = 0.2,
                                          #out_ff_activation = "ReLU",
                                          out_ff_initialization="kaiming",
                                          batch_norm_continuous_input=False)

model_config_autoint = AutoIntConfig(task = "regression")
model_config_tabtransformer = TabTransformerConfig(task = "regression", loss = "MSELoss")

# Predictions
pred_ftt_ransformer = setting_model_and_getting_predictions_dif_seeds(model_config = model_config_ftt_ransfomer)
pred_autoint = setting_model_and_getting_predictions_dif_seeds(model_config = model_config_autoint)
pred_tab_transformer = setting_model_and_getting_predictions_dif_seeds(model_config = model_config_tabtransformer)



# Plotting

# Set figure size
plt.figure(figsize=(22, 12))

# First raw

plt.subplot(1, 3, 1)
plt.ylabel("$Y = 10X+6X^{2}-1.5X^{3} + X^{2} *  sin(3X)*\epsilon$", fontdict = font)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_ftt_ransformer[i]['col1'], pred_ftt_ransformer[i]['target_prediction'], label = 'FT-Transformer prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(underexposed)
plt.title('FT-Transformer')


plt.subplot(1, 3, 2)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_autoint[i]['col1'], pred_autoint[i]['target_prediction'], label = 'AutoInt prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(properly_exposed)
plt.title('AutoInt')


plt.subplot(1, 3, 3)
plt.scatter(df_test['col1'], df_test['target'], label = 'True', color = 'black')
for i in range(N_in_ensemble):
  plt.scatter(pred_tab_transformer[i]['col1'], pred_tab_transformer[i]['target_prediction'], label = 'TabTransformer prediction (with ReLU activation. {})'.format(i))
plt.legend(fontsize = 12);
#plt.imshow(properly_exposed)
plt.title('TabTransformer')



Epistemic uncertainty

In [None]:
from tensorflow.keras import backend as K
import tensorflow as tf

# aleatoric loss function
def aleatoric_loss(y_true, y_pred):
    N = y_true.shape[0]
    se = K.pow((y_true[:,0]-y_pred[:,0]),2)
    inv_std = K.exp(-y_pred[:,1])
    mse = K.mean(inv_std*se)
    reg = K.mean(y_pred[:,1])
    return 0.5*(mse + reg)

In [None]:
def own_mse_loss(input, target, size_average=True):
    L = (input - target) ** 2
    return torch.mean(L) if size_average else torch.sum(L)

In [None]:
#Homoskedastic and continuous

In [None]:
# FT - Transformer (new data)
num_col_names = ['col1']
cat_col_names = ['cat1']

samples = 20000
x_data = np.linspace(0,100, samples)
y_data = np.concatenate((np.zeros(int(samples/2)), 3*np.ones(int(samples/2))))+ np.sin(0.1*np.linspace(0,100, samples)) + 0.03*np.linspace(0,100, samples)

plt.scatter(x_data, y_data);



X_train, X_val, y_train, y_val = train_test_split(x_data, y_data, random_state = 42)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val)

df_train = pd.DataFrame({'col1':X_train.flatten(), 'cat1':0, 'target':y_train.flatten()})
df_valid = pd.DataFrame({'col1':X_val.flatten(), 'cat1':0, 'target':y_val.flatten()})
df_test = pd.DataFrame({'col1':X_test.flatten(), 'cat1':0, 'target':y_test.flatten()})



model_config = FTTransformerConfig(
    task = "regression",
    learning_rate=1e-4,
    seed = 42,
    input_embed_dim = 32,
    num_heads = 16,
    num_attn_blocks = 10,
    ff_dropout = 0.2,
    out_ff_layers = "1024-512-256",
    out_ff_activation = "LeakyReLU",
    #out_ff_activation = "ReLU",
    out_ff_initialization="kaiming",
    batch_norm_continuous_input=False,
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)



tabular_model_leakeyrelu = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

tabular_model_leakeyrelu.fit(train=df_train, validation=df_valid)

pred_df_leakeyrelu = tabular_model_leakeyrelu.predict(df_test, ret_logits=False)
pred_df_leakeyrelu.head()


model_config = FTTransformerConfig(
    task = "regression",
    learning_rate=1e-4,
    seed = 42,
    input_embed_dim = 32,
    num_heads = 16,
    num_attn_blocks = 10,
    ff_dropout = 0.2,
    out_ff_layers = "1024-512-256",
    #out_ff_activation = "LeakyReLU",
    out_ff_activation = "ReLU",
    out_ff_initialization="kaiming",
    batch_norm_continuous_input=False,
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)





In [None]:
tabular_model_relu_fft = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

tabular_model_relu_fft.fit(train=df_train, validation=df_valid)

pred_df_fft = tabular_model_relu_fft.predict(df_test, ret_logits=False)
pred_df_fft.head()

In [None]:


plt.figure(figsize = (15,12))
plt.scatter(pred_df_fft['col1'], pred_df_fft['target_prediction'], label = 'FT-Transform prediction (with ReLU activation)', color = 'red')
plt.scatter(pred_df_leakeyrelu['col1'], pred_df_leakeyrelu['target_prediction'], label = 'FT-Transform prediction (with LeakeyRelu activation)', color = 'blue')
plt.scatter(pred_df_fft['col1'], pred_df_fft['target'], label = 'True', color = 'black')
plt.xlabel("X", fontdict = font)
plt.ylabel("Y = sin(0.1X) + 0.03X", fontdict = font)
plt.title("Homoskedastic and continuous dataset", fontdict = font_title)
plt.legend(fontsize = 14);

TabNet

In [None]:
# TabNet

epochs = 15
batch_size = 64
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=False, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})

optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})


model_config = TabNetModelConfig(
    task = "regression"
)



tabular_model_tabnet = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

tabular_model_tabnet.fit(train=df_train, validation=df_valid)

In [None]:
pred_df_tabnet = tabular_model_tabnet.predict(df_test, ret_logits=False)
pred_df_tabnet.head()

In [None]:
plt.figure(figsize = (15,12))
plt.scatter(pred_df_fft['col1'], pred_df_fft['target_prediction'], label = 'FT-Transform prediction (with ReLU activation)', color = 'red')
plt.scatter(pred_df_leakeyrelu['col1'], pred_df_leakeyrelu['target_prediction'], label = 'FT-Transform prediction (with LeakeyRelu activation)', color = 'blue')
plt.scatter(pred_df_fft['col1'], pred_df_fft['target'], label = 'True', color = 'black')
plt.scatter(pred_df_tabnet['col1'], pred_df_tabnet['target_prediction'], label = 'TabNet prediction', color = 'green')
plt.xlabel("X", fontdict = font)
plt.ylabel("Y = sin(0.1X) + 0.03X", fontdict = font)
plt.title("Hohoskedastic and continuous dataset", fontdict = font_title)
plt.legend(fontsize = 14);

Node

In [None]:
# Node

epochs = 15
batch_size = 64
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=False, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})

optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})


model_config = NodeConfig(
    task = "regression"
)



tabular_model_node = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

tabular_model_node.fit(train=df_train, validation=df_valid)
pred_df_node = tabular_model_node.predict(df_test, ret_logits=False)

In [None]:
plt.figure(figsize = (15,12))
plt.scatter(pred_df_fft['col1'], pred_df_fft['target_prediction'], label = 'FT-Transform prediction (with ReLU activation)', color = 'red')
plt.scatter(pred_df_leakeyrelu['col1'], pred_df_leakeyrelu['target_prediction'], label = 'FT-Transform prediction (with LeakeyRelu activation)', color = 'blue')
plt.scatter(pred_df_fft['col1'], pred_df_fft['target'], label = 'True', color = 'black')
plt.scatter(pred_df_tabnet['col1'], pred_df_tabnet['target_prediction'], label = 'TabNet prediction', color = 'green')
plt.scatter(pred_df_node['col1'], pred_df_node['target_prediction'], label = 'Node prediction', color = 'red')
plt.xlabel("X", fontdict = font)
plt.ylabel("Y = sin(0.1X) + 0.03X", fontdict = font)
plt.title("Homoskedastic and discontinuous dataset", fontdict = font_title)
plt.legend(fontsize = 14);

TabTransformer

In [None]:
# TabTransformer


epochs = 15
batch_size = 64
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=False, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})

optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})


model_config = TabTransformerConfig(
    task = "regression"
)



tabular_model_tabtransformer = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

tabular_model_tabtransformer.fit(train=df_train, validation=df_valid)
pred_df_tabtransformer = tabular_model_tabtransformer.predict(df_test, ret_logits=False)


In [None]:
plt.figure(figsize = (15,12))
plt.scatter(pred_df_fft['col1'], pred_df_fft['target_prediction'], label = 'FT-Transform prediction (with ReLU activation)', color = 'red')
plt.scatter(pred_df_leakeyrelu['col1'], pred_df_leakeyrelu['target_prediction'], label = 'FT-Transform prediction (with LeakeyRelu activation)', color = 'blue')
plt.scatter(pred_df_fft['col1'], pred_df_fft['target'], label = 'True', color = 'black')
plt.scatter(pred_df_tabnet['col1'], pred_df_tabnet['target_prediction'], label = 'TabNet prediction', color = 'green')
plt.scatter(pred_df_node['col1'], pred_df_node['target_prediction'], label = 'Node prediction', color = 'red')
plt.scatter(pred_df_tabtransformer['col1'], pred_df_tabtransformer['target_prediction'], label = 'TabTransformer prediction', color = 'yellow')
plt.xlabel("X", fontdict = font)
plt.ylabel("Y = sin(0.1X) + 0.03X", fontdict = font)
plt.title("Homoskedastic and continuous dataset", fontdict = font_title)
plt.legend(fontsize = 14);

AutoInt

In [None]:
# AutoInt

epochs = 15
batch_size = 64
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=False, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})

optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})


model_config = AutoIntConfig(
    task = "regression"
)



tabular_model_autoint = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

tabular_model_autoint.fit(train=df_train, validation=df_valid)



In [None]:
pred_df_autoint = tabular_model_autoint.predict(df_test, ret_logits=False)
pred_df_autoint.head()



In [None]:
plt.figure(figsize = (15,12))
plt.scatter(pred_df_fft['col1'], pred_df_fft['target_prediction'], label = 'FT-Transform prediction (with ReLU activation)', color = 'red')
plt.scatter(pred_df_leakeyrelu['col1'], pred_df_leakeyrelu['target_prediction'], label = 'FT-Transform prediction (with LeakeyRelu activation)', color = 'blue')

plt.scatter(pred_df_tabnet['col1'], pred_df_tabnet['target_prediction'], label = 'TabNet prediction', color = 'green')
plt.scatter(pred_df_node['col1'], pred_df_node['target_prediction'], label = 'Node prediction', color = 'red')
plt.scatter(pred_df_tabtransformer['col1'], pred_df_tabtransformer['target_prediction'], label = 'TabTransformer prediction', color = 'yellow')
plt.scatter(pred_df_autoint['col1'], pred_df_autoint['target_prediction'], label = 'AutoInt prediction', color = 'pink')

plt.scatter(pred_df_fft['col1'], pred_df_fft['target'], label = 'True', color = 'black')
plt.xlabel("X", fontdict = font)
plt.ylabel("Y = sin(0.1X) + 0.03X", fontdict = font)
plt.title("Homoskedastic and continuous dataset", fontdict = font_title)
plt.legend(fontsize = 14);

In [None]:
def uncertainity_estimate(x, model, num_samples, l2):
    outputs = np.hstack([model.predict(x, ret_logits=False).detach().numpy() for i in range(num_samples)]) # n번 inference, output.shape = [20, N]
    y_mean = outputs.mean(axis=1)
    y_variance = outputs.var(axis=1)
    tau = l2 * (1. - model.dropout_rate) / (2. * N * model.decay)
    y_variance += (1. / tau)
    y_std = np.sqrt(y_variance)
    return y_mean, y_std


# Normalise data:

x_mean, x_std = df_train['col1'].mean(), df_train['col1'].std()
y_mean, y_std = df_train['target'].mean(), df_train['target'].std()
x_obs = (df_train['col1'] - x_mean) / x_std
y_obs = (df_train['target'] - y_mean) / y_std
x_test = (df_valid['col1'] - x_mean) / x_std
y_test = (df_valid['target'] - y_mean) / y_std

iters_uncertainty = 200

lengthscale = 0.01

device = torch.device("cuda")
n_std = 2 # number of standard deviations to plot
y_mean, y_std = uncertainity_estimate(x = torch.Tensor(x_test).view(-1,1).to(device),
                                      model = tabular_model, iters_uncertainty, lengthscale)

plt.figure(figsize=(12,6))
plt.plot(x_obs, y_obs, ls="none", marker="o", color="0.1", alpha=0.8, label="observed")
plt.plot(x_test, y_mean, ls="-", color="b", label="mean")
plt.plot(x_test, y_test, ls='--', color='r', label='true')
for i in range(n_std):
    plt.fill_between( x_test,
        y_mean - y_std * ((i+1.)),
        y_mean + y_std * ((i+1.)),
        color="b",
        alpha=0.1)
plt.legend()
plt.grid()

In [None]:
def uncertainity_estimate(x, model, num_samples, l2):
    outputs = np.hstack([model(x).cpu().detach().numpy() for i in range(num_samples)]) # n번 inference, output.shape = [20, N]
    y_mean = outputs.mean(axis=1)
    y_variance = outputs.var(axis=1)
    tau = l2 * (1. - model.dropout_rate) / (2. * N * model.decay)
    y_variance += (1. / tau)
    y_std = np.sqrt(y_variance)
    return y_mean, y_std