In [1]:
import numpy as np
import math
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torch.multiprocessing as mp
from datetime import datetime

import pdb

from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

# Utils
import data_generator
from variational_layer import VariationalLayer

In [2]:
# Setting the seeds to allow replication
# Changing the seed might require hyperparameter tuning again
# Because it changes the deterministic parameters
seed_number = 0
np.random.seed(seed_number)
torch.manual_seed(seed_number)
random.seed(seed_number)

In [3]:
is_cuda = False
dev = torch.device('cpu')  
if torch.cuda.is_available():
    is_cuda = True
    dev = torch.device('cuda')  

In [4]:
N = 5000
N_train = 3000
N_valid = N - N_train

In [5]:
X, y = data_generator.data_4to4(N)
X, y_perfect = data_generator.data_4to4(N, noise_level=0)

### Plot if output data is 1D
pca = PCA(n_components=1)
pca.fit(X)
X_plot = pca.transform(X)

fig,ax = plt.subplots(figsize=(6,3), dpi=120)
plt.scatter(X_plot, y, c='blue', alpha=0.2, s=5)
plt.scatter(X_plot, y_perfect, c='r', alpha=0.7, s=0.3)

In [6]:
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [7]:
class ArtificialDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        return

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X_i = self.X[idx]
        y_i = self.y[idx]

        return X_i, y_i

In [8]:
dataset = ArtificialDataset(X, y)

data_train, data_valid = torch.utils.data.random_split(
    dataset, [N_train, N_valid])

BATCH_SIZE_LOADER = 64

training_loader = torch.utils.data.DataLoader(
    data_train, batch_size=BATCH_SIZE_LOADER,
    shuffle=True, num_workers=mp.cpu_count())

validation_loader = torch.utils.data.DataLoader(
    data_valid, batch_size=BATCH_SIZE_LOADER,
    shuffle=True, num_workers=mp.cpu_count())

In [9]:
class VariationalNet(nn.Module):
    # Initialize the layers
    def __init__(self, n_samples, input_size, output_size, K):
        super().__init__()
        self.n_samples = n_samples
        self.act1 = nn.ReLU()
        self.act2 = nn.Tanh()
        self.act3 = nn.Sigmoid()
        self.linear1 = VariationalLayer(input_size, 32, 0, 3, n_samples)
        #self.bn = nn.BatchNorm1d(16)
        self.linear2 = VariationalLayer(32, 16, 0, 3, n_samples)
        self.linear3 = VariationalLayer(16, output_size, 0, 3, n_samples)
        self.K = K
    
    # Perform the computation
    def forward(self, x):
        #pdb.set_trace()
        x = torch.unsqueeze(x, 0)
        x = x.expand((self.n_samples, x.shape[1], x.shape[2]))
        x = self.linear1(x)
        #x = self.bn(x)
        x = self.act1(x)
        x = self.linear2(x)
        x = self.act1(x)
        x = self.linear3(x)
        x = self.act1(x)
        return x
    
    def kl_divergence_NN(self):
        kl = (
            self.linear1.kl_divergence_layer() 
            + self.linear2.kl_divergence_layer()
            + self.linear3.kl_divergence_layer()
        )
        return kl

In [10]:
n_samples = 10
input_size = X.shape[1]
output_size = y.shape[1]

K = 1

In [11]:
h = VariationalNet(n_samples, input_size, output_size, K).to(dev)
#h1 = ANN(input_size, output_size).to(dev)
#h2 = BayesianNetwork(1).to(dev)
opt_h = torch.optim.Adam(h.parameters(), lr=0.005)

mse_loss_mean = nn.MSELoss(reduction='mean')
mse_loss_sum = nn.MSELoss(reduction='sum')

In [12]:
BATCH_SIZE = 64

In [13]:
def train_one_epoch(epoch_index):
    
    mse_running_loss = 0.
    kl_running_loss = 0.
    
    n = len(training_loader.dataset)

    for i, data in enumerate(training_loader):
        
        x_batch, y_batch = data
        opt_h.zero_grad()

        y_preds = h(x_batch)
        y_preds_mean = y_preds.mean(axis=0).squeeze()

        mse_loss_ = mse_loss_sum(y_preds_mean, y_batch)
        kl_loss_ = h.K*h.kl_divergence_NN()
        
        elbo_loss = mse_loss_ + kl_loss_
        elbo_loss.backward()
        
        opt_h.step()
        
        mse_running_loss += mse_loss_.item()
        kl_running_loss += kl_loss_.item()
        
    mse = mse_running_loss/n
    kl = kl_running_loss/n

    return mse, kl

In [14]:
# Initializing in a separate cell so we can easily add more epochs to the same run
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
epoch_number = 0

EPOCHS = 150

for epoch in range(EPOCHS):
    print('------------------EPOCH {}------------------'.format(
        epoch_number + 1))

    h.train(True)
    avg_mse_loss, avg_kl_loss = train_one_epoch(
        epoch_number)
    avg_loss = avg_mse_loss + avg_kl_loss
    
    h.train(False)
    mse_running_loss_v = 0.0
    kl_running_loss_v = 0.0
    
    n = len(validation_loader.dataset)
    
    for i, vdata in enumerate(validation_loader):
        
        x_val_batch, y_val_batch = vdata

        y_val_preds = h(x_val_batch)
        y_val_preds_mean = y_val_preds.mean(axis=0).squeeze()
        
        mse_loss_ = mse_loss_sum(y_val_preds_mean, y_val_batch)
        kl_loss_ = h.K*h.kl_divergence_NN()
        elbo_loss = mse_loss_ + kl_loss_
        
        mse_running_loss_v += mse_loss_
        kl_running_loss_v += kl_loss_
        
    avg_vmseloss = (mse_running_loss_v / n).item()
    avg_vklloss = (kl_running_loss_v / n).item()
    
    avg_vloss = avg_vmseloss + avg_vklloss
    
    print('MSE LOSS \t train {} valid {}'.format(
        round(avg_mse_loss, 2), round(avg_vmseloss, 2)))
    print('KL LOSS \t train {} valid {}'.format(
        round(avg_kl_loss, 2), round(avg_vklloss, 2)))
    print('ELBO LOSS \t train {} valid {}'.format(
        round(avg_loss, 2), round(avg_vloss, 2)))

    epoch_number += 1

------------------EPOCH 1------------------
MSE LOSS 	 train 516.66 valid 455.9
KL LOSS 	 train 59.48 valid 59.51
ELBO LOSS 	 train 576.15 valid 515.4
------------------EPOCH 2------------------
MSE LOSS 	 train 438.98 valid 439.74
KL LOSS 	 train 57.1 valid 57.09
ELBO LOSS 	 train 496.09 valid 496.83
------------------EPOCH 3------------------
MSE LOSS 	 train 427.64 valid 431.58
KL LOSS 	 train 54.78 valid 54.72
ELBO LOSS 	 train 482.42 valid 486.3
------------------EPOCH 4------------------
MSE LOSS 	 train 423.38 valid 431.85
KL LOSS 	 train 52.46 valid 52.38
ELBO LOSS 	 train 475.83 valid 484.23
------------------EPOCH 5------------------
MSE LOSS 	 train 422.76 valid 427.86
KL LOSS 	 train 50.17 valid 50.1
ELBO LOSS 	 train 472.93 valid 477.96
------------------EPOCH 6------------------
MSE LOSS 	 train 421.26 valid 427.49
KL LOSS 	 train 48.0 valid 47.94
ELBO LOSS 	 train 469.26 valid 475.43
------------------EPOCH 7------------------
MSE LOSS 	 train 420.34 valid 428.37
KL LOSS

MSE LOSS 	 train 419.15 valid 432.98
KL LOSS 	 train 18.23 valid 18.52
ELBO LOSS 	 train 437.37 valid 451.49
------------------EPOCH 56------------------
MSE LOSS 	 train 419.48 valid 430.28
KL LOSS 	 train 18.11 valid 18.41
ELBO LOSS 	 train 437.6 valid 448.68
------------------EPOCH 57------------------
MSE LOSS 	 train 419.4 valid 429.83
KL LOSS 	 train 17.96 valid 18.32
ELBO LOSS 	 train 437.36 valid 448.15
------------------EPOCH 58------------------
MSE LOSS 	 train 417.84 valid 431.82
KL LOSS 	 train 17.88 valid 18.18
ELBO LOSS 	 train 435.72 valid 450.0
------------------EPOCH 59------------------
MSE LOSS 	 train 418.54 valid 429.59
KL LOSS 	 train 17.76 valid 18.1
ELBO LOSS 	 train 436.3 valid 447.69
------------------EPOCH 60------------------
MSE LOSS 	 train 418.74 valid 430.53
KL LOSS 	 train 17.66 valid 17.98
ELBO LOSS 	 train 436.39 valid 448.5
------------------EPOCH 61------------------
MSE LOSS 	 train 418.13 valid 430.57
KL LOSS 	 train 17.56 valid 17.87
ELBO LOSS 	

MSE LOSS 	 train 419.26 valid 430.81
KL LOSS 	 train 14.28 valid 14.58
ELBO LOSS 	 train 433.53 valid 445.39
------------------EPOCH 110------------------
MSE LOSS 	 train 417.4 valid 431.38
KL LOSS 	 train 14.22 valid 14.49
ELBO LOSS 	 train 431.63 valid 445.86
------------------EPOCH 111------------------
MSE LOSS 	 train 419.22 valid 431.26
KL LOSS 	 train 14.19 valid 14.42
ELBO LOSS 	 train 433.41 valid 445.68
------------------EPOCH 112------------------
MSE LOSS 	 train 417.49 valid 431.64
KL LOSS 	 train 14.15 valid 14.43
ELBO LOSS 	 train 431.65 valid 446.07
------------------EPOCH 113------------------
MSE LOSS 	 train 417.8 valid 430.93
KL LOSS 	 train 14.1 valid 14.37
ELBO LOSS 	 train 431.89 valid 445.3
------------------EPOCH 114------------------
MSE LOSS 	 train 417.97 valid 431.13
KL LOSS 	 train 14.06 valid 14.3
ELBO LOSS 	 train 432.03 valid 445.43
------------------EPOCH 115------------------
MSE LOSS 	 train 417.61 valid 431.56
KL LOSS 	 train 13.96 valid 14.24
ELBO

def plot_test_curve(X_test, y_test, h):
    xt = torch.hstack(
    (X_test[:,0], 
     torch.tensor(np.arange(-1.4, -1, 1/500), dtype = torch.float32), 
     torch.tensor(np.arange(1, 1.4, 1/500), dtype = torch.float32)))
    
    plt.scatter(X_test[:,0].detach(), y_test.detach())
    for i in range(0, 100):
        plt.scatter(xt.detach(), 
                    h(xt.unsqueeze(1)).detach().squeeze()[0], 
                    color='r', alpha=0.01)
        plt.ylim([-10, 10])
        
        
plot_test_curve(validation_loader.dataset.dataset.X,
               validation_loader.dataset.dataset.y,
               h)

In [43]:
M = 100

In [44]:
Y_pred = h(validation_loader.dataset.dataset.X)
for i in range(1,int(M/h.n_samples)):
    Y_pred = torch.vstack((Y_pred, h(validation_loader.dataset.dataset.X)))
    
y_test = validation_loader.dataset.dataset.y

In [55]:
# Demand distribution
demand_pred_distribution = Y_pred[:,:,0]
demand_true = y_test[:,0]

# Classical newsvendor problem
sell_price = torch.tensor(200)
cost_price = torch.tensor(20)

quantile_cut = (sell_price - cost_price)/sell_price

optimal_pred_order = torch.quantile(demand_pred_distribution, quantile_cut, dim=0)
optimal_order = demand_true

In [56]:
def profit(order):
    return sell_price*torch.minimum(order, demand_true) - cost_price*order

In [57]:
profit(optimal_pred_order).sum()

tensor(5114623.5000, grad_fn=<SumBackward0>)

In [58]:
profit(optimal_order).sum()

tensor(8101537.5000)

In [59]:
profit(demand_pred_distribution.mean()).sum()

tensor(4531089.5000, grad_fn=<SumBackward0>)

In [23]:
demand_pred_distribution.min()

tensor(0., grad_fn=<MinBackward1>)

In [24]:
torch.quantile(demand_pred_distribution, quantile_cut)

tensor(4.6937, grad_fn=<SqueezeBackward3>)

In [54]:
cy

tensor([[ 9.5101, 11.8222],
        [ 9.3594, 10.2748],
        [ 8.5177, 14.6552],
        [ 8.1419, 13.0726],
        [ 7.9186, 10.3805],
        [ 7.7878,  9.2600],
        [12.7521,  9.3533],
        [11.0066, 10.7209]], grad_fn=<CopySlices>)

In [29]:
Qy[,,]

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [24]:
torch.zeros((BATCH_SIZE_OP, 2, 2)).shape

torch.Size([8, 2, 2])

In [None]:
# Quadratic Programming (Q, c, A, b):



In [None]:
y