In [None]:
!pip install tensorflow
!pip install pytorch-widedeep

In [None]:
from __future__ import print_function

import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("ggplot")
from matplotlib.pyplot import imshow
%matplotlib inline


# widedeep
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader

import sklearn.datasets as dt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


import warnings
warnings.filterwarnings("ignore")


from pytorch_widedeep.models import FTTransformer, SAINT, TabFastFormer, TabPerceiver, TabTransformer

from tqdm import tqdm
import random

def set_total_seed(seed = 42):
  # Set random seed
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed);

#functions

def epistemic_uncertainty(model, X_test, x_train, y_train, T):

    probs_mc_dropout = []
    for _ in tqdm(range(T)):
        out = model(X_test).detach()
        probs_mc_dropout += [out.view(out.shape[0])]
    props = np.array([t.detach().numpy() for t in probs_mc_dropout])

    predictive_mean = np.mean(props, axis=0)
    predictive_variance = np.var(props, axis=0)

    #plt.plot(x_test, y_test, ls='--')
    plt.scatter(x_train, y_train, color='black')
    plt.errorbar(x_test, predictive_mean, yerr=predictive_variance, fmt='.', color = "blue")

def epistemic_uncertainty_wo_image(model, X_test, x_train, y_train, T):

    probs_mc_dropout = []
    for _ in tqdm(range(T)):
        out = model(X_test).detach()
        probs_mc_dropout += [out.view(out.shape[0])]
    props = np.array([t.detach().numpy() for t in probs_mc_dropout])

    predictive_mean = np.mean(props, axis=0)
    predictive_variance = np.var(props, axis=0)

    return predictive_mean, predictive_variance

def aleatoric_loss(y_true, y_pred):
    # 2 columns predicted: 1 is value, 2 is it's variance (std)
    N = y_true.shape[0]
    se = torch.pow((y_true[:,0]-y_pred[:,0]),2)
    inv_std = torch.exp((-1) * y_pred[:,1])
    loss1 = torch.mean(inv_std*se)
    loss2 = torch.mean(y_pred[:,1], dim = 0)
    return 0.5*(loss1 + loss2)

def pred_mean_and_epistemic_std(model, X_test, x_train, y_train, T):

    probs_mc_dropout = []
    for _ in tqdm(range(T)):
        out = model(X_test).detach()[:,0]
        
        probs_mc_dropout += [out.view(out.shape[0])]
    props = np.array([t.detach().numpy() for t in probs_mc_dropout])

    predictive_mean = np.mean(props, axis=0)
    predictive_variance = np.var(props, axis=0)

    return predictive_mean, predictive_variance


def aleatoric_std(model, X_test, T=40):
    probs_mc_dropout = []
    for _ in range(T):
        probs_mc_dropout += [model(X_test).detach()[:,1]]
    props = np.array([t.detach().numpy() for t in probs_mc_dropout])

    aleatoric_std = np.exp(0.5*np.mean(props, axis=0))
    
    
    return aleatoric_std

In [None]:
# parameters
n = 6000
test_n = 1000
seed = 11
rand_state = 11
test_rand_state = 13


### data
x,y = dt.make_regression(n_samples=n,
                         n_features=4,
                         noise=0,
                         random_state=rand_state)


x_1,y_1 = dt.make_friedman1(n_samples=n,n_features=5,random_state=rand_state)
x_2,y_2 = dt.make_friedman2(n_samples=n,random_state=rand_state)
x_3,y_3 = dt.make_friedman3(n_samples=n,random_state=rand_state)


x_t,y_t = dt.make_regression(n_samples=test_n,
                         n_features=4,
                         noise=0,
                         random_state=test_rand_state)


x_t1,y_t1 = dt.make_friedman1(n_samples=test_n, n_features=5, random_state=test_rand_state)
x_t2,y_t2 = dt.make_friedman2(n_samples=test_n, random_state=test_rand_state)
x_t3,y_t3 = dt.make_friedman3(n_samples=test_n, random_state=test_rand_state)

### data
x_train_poly = np.random.uniform(-7, -2, 2000)
y_train_poly = np.random.normal(scale=.05, size=x_train_poly.shape) + 3 

x_train_poly = np.concatenate([x_train_poly, np.random.uniform(5, 10, 2000)])
y_train_poly = np.concatenate([y_train_poly, np.random.normal(scale=.1, size=x_train_poly[2000:].shape)])+ 3

x_train_poly = np.concatenate([x_train_poly, np.random.uniform(15, 20, 2000)])
y_train_poly = np.concatenate([y_train_poly, np.random.normal(scale=.1, size=x_train_poly[4000:].shape)])+3

train_data = (torch.stack((torch.tensor(x_train_poly), torch.tensor(np.array(list([2]*2000+[0]*2000+[1]*2000)))), dim = 0).T).numpy()
train_data = torch.hstack([torch.tensor(train_data), torch.tensor(np.array(list([0]*6000))).unsqueeze(0).T]).detach().numpy()
# Standart scaling
#train_data = torch.hstack([torch.tensor(np.hstack([x, x_1, x_2, x_3])), torch.tensor(train_data)])
train_data = torch.hstack([torch.tensor(StandardScaler().fit_transform(np.hstack([x, x_1, x_2, x_3]))), torch.tensor(train_data)])


x_train = train_data
y_train = y_train_poly

x_test = np.linspace(-10,25,1000)
x_test = (torch.stack((torch.tensor(x_test), torch.tensor(np.array(list([2]*333+[0]*333+[1]*334)))), dim = 0).T).numpy()
x_test = torch.reshape(torch.tensor(x_test), (-1, 2))
x_test = torch.hstack([x_test, torch.tensor(np.array(list([0]*1000))).unsqueeze(0).T])

# Standart scaling
#x_test = torch.hstack([torch.tensor(np.hstack([x_t, x_t1, x_t2, x_t3])), torch.tensor(x_test)])
x_test = torch.hstack([torch.tensor(StandardScaler().fit_transform(np.hstack([x_t, x_t1, x_t2, x_t3]))), torch.tensor(x_test)])





plt.scatter(x_train_poly, y_train_poly);
#parameters
verbose = False
EPOCHS, BATCH_SIZE = 40, 16

# Set fixed random number seed
torch.manual_seed(42)




colnames = ["num_{}".format(i) for i in range(1, 19)]
colnames.append('cat_0')
colnames.append('cat_1')
 

continuous_cols = ["num_{}".format(i) for i in range(1, 19)]
categorical_cols = ['cat_0', 'cat_1']
cat_embed_input = [(u,i) for u,i in zip(colnames[-2:], [1]*2)]
column_idx = {k:v for v,k in enumerate(colnames)}


# default dropout set
model = FTTransformer(column_idx=column_idx,  
                      cat_embed_input=cat_embed_input,
                      continuous_cols=continuous_cols,
                      n_blocks=2, n_heads = 8, input_dim = 32, 
                      cat_embed_dropout = 0.0, cont_embed_dropout = 0.0,
                      attn_dropout = 0.0, ff_dropout = 0.1,
                      mlp_activation = "leaky_relu",
                      mlp_dropout = 0.0, mlp_hidden_dims = [64, 32, 16, 2])


# TRAIN
# optimizers
optimizer = Adam(model.parameters(), lr = 1e-4)



# data and dataloader
dataset = TensorDataset(torch.tensor(x_train), torch.tensor(y_train))
data_train = DataLoader(dataset = dataset, batch_size = BATCH_SIZE, shuffle = False)

# criterion and device
#criterion = nn.MSELoss()
device = 'cpu'
loss_dict = {}

# Run the training loop
for epoch in tqdm(range(0, EPOCHS)): # 100 epochs at maximum
  
  # Print epoch
  if verbose:
    print(f'Starting epoch {epoch+1}')
  
  # Set current loss value
  current_loss = 0.0
  
  # Iterate over the DataLoader for training data
  for i, data in enumerate(data_train, 0):
    
    # Get and prepare inputs
    inputs, targets = data
    targets = targets.float()
    

    if len(inputs.shape) == 1:
      inputs = inputs.reshape(inputs.shape[0], 1)
    else:
      pass

    targets = targets.reshape((targets.shape[0], 1))
    
    # Zero the gradients
    optimizer.zero_grad()
    
    # Perform forward pass
    
    outputs = model(inputs)
    
    # Compute loss
    loss = aleatoric_loss(targets, outputs)
    
    # Perform backward pass
    loss.backward()
    
    # Perform optimization
    optimizer.step()
    
    # Print statistics
    current_loss += loss.item()
    loss_dict[epoch] = current_loss
    if verbose:
      if i % 10 == 0:
          print('Loss after mini-batch %5d: %.3f' %
                (i + 1, current_loss / 50))
      current_loss = 0.0

In [None]:
plt.plot(list(loss_dict.keys()), list(loss_dict.values()))
plt.xlabel("epoch")
plt.ylabel("loss");

In [None]:
mean, epistemic_std = pred_mean_and_epistemic_std(model = model,
                                                  X_test = x_test,
                                                  x_train = x_train,
                                                  y_train = y_train,
                                                  T = 50)

a_std = aleatoric_std(model, X_test=x_test, T=50)

plt.figure(figsize = (12,7))
#_=plt.scatter(x_test, y_test, ls='--')
pca = PCA(n_components=1)
train_pca_x = pca.fit_transform(x_train)
test_pca_x = pca.fit_transform(x_test)

_=plt.scatter(train_pca_x, y_train, color='black')
_=plt.errorbar(test_pca_x[::-1], mean, yerr=a_std, fmt='.', color = "green", alpha =0.3)
_=plt.title('Prediction with Aleatoric and Epistemic std')

_=plt.errorbar(test_pca_x[::-1], mean, yerr=epistemic_std, fmt='.', color = "blue", alpha =0.3)

In [None]:
plt.figure(figsize = (12,7))
plt.plot(epistemic_std, label = "epistimic")
plt.plot(a_std, label = "aleatoric")
plt.legend();
