In [1]:
import torch
import torch.multiprocessing as mp
import torch.nn as nn

import numpy as np
import random

import data_generator
import params_newsvendor as params

from sklearn.preprocessing import StandardScaler

from model import VariationalLayer, VariationalNet, StandardNet, VariationalNet2

from train import TrainDecoupled


import joblib

from train_normflow import TrainFlowDecoupled

from qpth.qp import QPFunction

In [2]:
is_cuda = False
dev = torch.device('cpu')  
if torch.cuda.is_available():
    is_cuda = True
    dev = torch.device('cuda')

In [3]:
# Setting the seeds to allow replication
# Changing the seed might require hyperparameter tuning again
# Because it changes the deterministic parameters
seed_number = 9
np.random.seed(seed_number)
torch.manual_seed(seed_number)
random.seed(seed_number)

In [4]:
# Setting parameters (change if necessary)
N = 8000 # Total data size
N_train = 5000 # Training data size
N_SAMPLES = 16 # Sampling size while training
BATCH_SIZE_LOADER = 32 # Standard batch size
EPOCHS = 80 

In [5]:
nl = 1

In [6]:
# Data manipulation
N_valid = N - N_train
X, Y_original = data_generator.data_4to8(N_train, noise_level=nl)

In [7]:
# Output normalization
scaler = StandardScaler()
scaler.fit(Y_original)
tmean = torch.tensor(scaler.mean_)
tstd = torch.tensor(scaler.scale_)
joblib.dump(scaler, 'scaler_multi.gz')

['scaler_multi.gz']

In [8]:
def inverse_transform(yy):
    return yy*tstd + tmean

Y = scaler.transform(Y_original).copy()
X = torch.tensor(X, dtype=torch.float32)
Y = torch.tensor(Y, dtype=torch.float32)

In [9]:
data_train = data_generator.ArtificialDataset(X, Y)
training_loader = torch.utils.data.DataLoader(
    data_train, batch_size=BATCH_SIZE_LOADER,
    shuffle=False, num_workers=mp.cpu_count())

X_val, Y_val_original = data_generator.data_4to8(N_valid, noise_level=nl)
Y_val = scaler.transform(Y_val_original).copy()
X_val = torch.tensor(X_val, dtype=torch.float32).to(dev)
Y_val_original = torch.tensor(Y_val_original, dtype=torch.float32).to(dev)
Y_val = torch.tensor(Y_val, dtype=torch.float32).to(dev)

data_valid = data_generator.ArtificialDataset(X_val, Y_val)
validation_loader = torch.utils.data.DataLoader(
    data_valid, batch_size=BATCH_SIZE_LOADER,
    shuffle=False, num_workers=mp.cpu_count())

input_size = X.shape[1]
output_size = Y.shape[1]

In [10]:
class SolveNewsvendorWithKKT():
    def __init__(self, params_t, n_samples):
        super(SolveNewsvendorWithKKT, self).__init__()
            
        n_items = len(params_t['c'])
        self.n_items = n_items  
        self.n_samples = n_samples
            
        # Torch parameters for KKT         
        ident = torch.eye(n_items)
        ident_samples = torch.eye(n_items*n_samples)
        ident3 = torch.eye(n_items + 2*n_items*n_samples)
        zeros_matrix = torch.zeros((n_items*n_samples, n_items*n_samples))
        zeros_array = torch.zeros(n_items*n_samples)
        ones_array = torch.ones(n_items*n_samples)
             
        self.Q = torch.diag(
            torch.hstack(
                (
                    params_t['q'], 
                    (1/n_samples)*params_t['qs'].repeat_interleave(n_samples), 
                    (1/n_samples)*params_t['qw'].repeat_interleave(n_samples)
                )
            )).to(dev)
        
        
        self.lin = torch.hstack(
                                (
                                    params_t['c'], 
                                    (1/n_samples)*params_t['cs'].repeat_interleave(n_samples), 
                                    (1/n_samples)*params_t['cw'].repeat_interleave(n_samples)
                                )).to(dev)
             
            
        shortage_ineq = torch.hstack(
            (
                -ident.repeat_interleave(n_samples, 0), 
                -ident_samples, 
                zeros_matrix
            )
        )  
        
        
        excess_ineq = torch.hstack(
            (
                ident.repeat_interleave(n_samples, 0), 
                zeros_matrix, 
                -ident_samples
            )
        )
        
        
        price_ineq = torch.hstack(
            (
                params_t['pr'], 
                zeros_array, 
                zeros_array
            )
        )
        
        
        positive_ineq = -ident3
        
        
        self.ineqs = torch.vstack(
            (
                shortage_ineq, 
                excess_ineq, 
                price_ineq, 
                positive_ineq
            )
        ).to(dev)
 
        self.uncert_bound = torch.hstack((-ones_array, ones_array)).to(dev)
        
        self.determ_bound = torch.tensor([params_t['B']]) 
        
        self.determ_bound = torch.hstack((self.determ_bound, 
                                          torch.zeros(n_items), 
                                          torch.zeros(n_items*n_samples), 
                                          torch.zeros(n_items*n_samples))).to(dev)
        
        
        
    def forward(self, y):
        """
        Applies the qpth solver for all batches and allows backpropagation.
        Formulation based on Priya L. Donti, Brandon Amos, J. Zico Kolter (2017).
        Note: The quadratic terms (Q) are used as auxiliar terms only to allow the backpropagation through the 
        qpth library from Amos and Kolter. 
        We will set them as a small percentage of the linear terms (Wilder, Ewing, Dilkina, Tambe, 2019)
        """
        
        batch_size, n_samples_items = y.size()
                
        assert self.n_samples*self.n_items == n_samples_items 

        Q = self.Q
        Q = Q.expand(batch_size, Q.size(0), Q.size(1))
        
        lin = self.lin
        lin = lin.expand(batch_size, lin.size(0))

        ineqs = torch.unsqueeze(self.ineqs, dim=0)
        ineqs = ineqs.expand(batch_size, ineqs.shape[1], ineqs.shape[2])       

        uncert_bound = (self.uncert_bound*torch.hstack((y, y)))
        determ_bound = self.determ_bound.unsqueeze(dim=0).expand(
            batch_size, self.determ_bound.shape[0])
        bound = torch.hstack((uncert_bound, determ_bound))     
        
        e = torch.DoubleTensor().to(dev)
        
        argmin = QPFunction(verbose=-1)\
            (Q.double(), lin.double(), ineqs.double(), 
             bound.double(), e, e).double()
            
        return argmin[:,:n_items]

In [11]:
cost_per_item = lambda Z, Y : params_t['q'].to(dev)*Z.to(dev)**2 \
                            + params_t['qs'].to(dev)*(torch.max(torch.zeros((n_items)).to(dev),Y.to(dev)-Z.to(dev)))**2 \
                            + params_t['qw'].to(dev)*(torch.max(torch.zeros((n_items)).to(dev),Z.to(dev)-Y.to(dev)))**2 \
                            + params_t['c'].to(dev)*Z.to(dev) \
                            + params_t['cs'].to(dev)*torch.max(torch.zeros((n_items)).to(dev),Y.to(dev)-Z.to(dev)) \
                            + params_t['cw'].to(dev)*torch.max(torch.zeros((n_items)).to(dev),Z.to(dev)-Y.to(dev))


def reshape_outcomes(y_pred):
    n_samples = y_pred.shape[0]
    batch_size = y_pred.shape[1]
    n_items = y_pred.shape[2]

    y_pred = y_pred.permute((1, 2, 0)).reshape((batch_size, n_samples*n_items))

    return y_pred

def calc_f_por_item(y_pred, y):
    y_pred = reshape_outcomes(y_pred)
    z_star =  argmin_solver(y_pred)
    f_per_item = cost_per_item(z_star, y)
    return f_per_item

def calc_f_per_day(y_pred, y):
    f_per_item = calc_f_por_item(y_pred, y)
    f = torch.sum(f_per_item, 1)
    return f

def cost_fn(y_pred, y):
    f = calc_f_per_day(y_pred, y)
    f_total = torch.mean(f)
    return f_total

In [12]:
h_ann = StandardNet(input_size, output_size, 0).to(dev)
h_bnn = VariationalNet2(N_SAMPLES, input_size, output_size, 1.0).to(dev)

opt_h_ann = torch.optim.Adam(h_ann.parameters(), lr=0.0010)
opt_h_bnn = torch.optim.Adam(h_bnn.parameters(), lr=0.0015)

mse_loss = nn.MSELoss(reduction='none')

In [13]:
train_ANN = TrainDecoupled(
                    bnn = False,
                    model=h_ann,
                    opt=opt_h_ann,
                    loss_data=mse_loss,
                    K=0.0,
                    training_loader=training_loader,
                    validation_loader=validation_loader
                )

train_ANN.train(EPOCHS=120)
model_ann = train_ANN.model

------------------EPOCH 1------------------


RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/repository/code/pao_uncertainty/data_generator.py", line 16, in __getitem__
    X_i = self.X[idx]
RuntimeError: CUDA error: initialization error
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.


In [13]:
train_BNN = TrainDecoupled(
                    bnn = True,
                    model=h_bnn,
                    opt=opt_h_bnn,
                    loss_data=mse_loss,
                    K=1.0,
                    training_loader=training_loader,
                    validation_loader=validation_loader
                )

train_BNN.train(EPOCHS=150)
model_bnn = train_BNN.model

AttributeError: module 'torch' has no attribute 'pi'

In [13]:
train_flow = TrainFlowDecoupled(steps=4000, input_size=4, output_size=8)
model_flow = train_flow.train(X, Y, X_val, Y_val)

step: 0, train loss: 5.12326, val loss: 5.14346
step: 50, train loss: 2.13778, val loss: 2.15931
step: 100, train loss: 1.8985, val loss: 1.9508
step: 150, train loss: 1.8401, val loss: 1.88361
step: 200, train loss: 1.73643, val loss: 1.82557
step: 250, train loss: 1.73839, val loss: 1.77618
step: 300, train loss: 1.63881, val loss: 1.70825
step: 350, train loss: 1.64095, val loss: 1.73542
step: 400, train loss: 1.58708, val loss: 1.63954
step: 450, train loss: 1.52352, val loss: 1.5941
step: 500, train loss: 1.5568, val loss: 1.61974
step: 550, train loss: 1.46915, val loss: 1.59045
step: 600, train loss: 1.52485, val loss: 1.60449
step: 650, train loss: 1.45155, val loss: 1.53815
step: 700, train loss: 1.45144, val loss: 1.53169
step: 750, train loss: 1.44863, val loss: 1.53663
step: 800, train loss: 1.48449, val loss: 1.53341
step: 850, train loss: 1.40554, val loss: 1.51814
step: 900, train loss: 1.41138, val loss: 1.49453
step: 950, train loss: 1.46639, val loss: 1.53898
step: 10

In [13]:
n_items = output_size

In [14]:
params_t, _ = params.get_params(n_items, seed_number)

In [18]:
# Propagating predictions to Newsvendor Problem
M = 4

Y_pred_ANN = train_ANN.model(X_val).unsqueeze(0)
Y_pred_ANN = inverse_transform(Y_pred_ANN)

train_BNN.model.update_n_samples(n_samples=M)
Y_pred_BNN = train_BNN.model.forward_dist(X_val)
Y_pred_BNN = inverse_transform(Y_pred_BNN)
M = Y_pred_BNN.shape[0]

N = X_val.shape[0]
Y_pred_flow = torch.zeros((M, N, n_items))
for i in range(0, N):
    Y_pred_flow[:,i,:] = model_flow.condition(X_val[i]).sample(torch.Size([M,])).squeeze()
Y_pred_flow = inverse_transform(Y_pred_flow)

In [19]:
mse_loss = nn.MSELoss()
print(mse_loss(Y_pred_ANN.mean(axis=0), Y_val_original))
print(mse_loss(Y_pred_BNN.mean(axis=0), Y_val_original))
print(mse_loss(Y_pred_flow.mean(axis=0), Y_val_original))

tensor(1.7952, dtype=torch.float64, grad_fn=<MseLossBackward0>)
tensor(1.8478, dtype=torch.float64, grad_fn=<MseLossBackward0>)
tensor(2.2457, dtype=torch.float64)


In [15]:
# Construct the solver
newsvendor_solve_kkt = SolveNewsvendorWithKKT(params_t, 1)
newsvendor_solve_kkt_M = SolveNewsvendorWithKKT(params_t, M)

NameError: name 'M' is not defined

In [21]:
def argmin_solver(y_pred):
    z_star = newsvendor_solve_kkt.forward(y_pred)
    return z_star

n_batches = int(np.ceil(Y_pred_ANN.shape[1]/BATCH_SIZE_LOADER))

f_total = 0
f_total_best = 0

for b in range(0, n_batches):
    i_low = b*BATCH_SIZE_LOADER
    i_up = (b+1)*BATCH_SIZE_LOADER
    if b == n_batches-1:
        i_up = n_batches*Y_pred_ANN.shape[1]
    f_total += cost_fn(Y_pred_ANN[:,i_low:i_up,:], Y_val_original[i_low:i_up,:])/n_batches
    print(b, f_total)

0 tensor(368.8417, dtype=torch.float64, grad_fn=<AddBackward0>)
1 tensor(719.3489, dtype=torch.float64, grad_fn=<AddBackward0>)
2 tensor(1074.0162, dtype=torch.float64, grad_fn=<AddBackward0>)
3 tensor(1392.0964, dtype=torch.float64, grad_fn=<AddBackward0>)


torch.linalg.eig returns complex tensors of dtype cfloat or cdouble rather than real tensors mimicking complex tensors.
L, _ = torch.eig(A)
should be replaced with
L_complex = torch.linalg.eigvals(A)
and
L, V = torch.eig(A, eigenvectors=True)
should be replaced with
L_complex, V_complex = torch.linalg.eig(A) (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:2910.)
  e, _ = torch.eig(Q[i])


4 tensor(1755.9449, dtype=torch.float64, grad_fn=<AddBackward0>)
5 tensor(2138.9331, dtype=torch.float64, grad_fn=<AddBackward0>)
6 tensor(2484.1407, dtype=torch.float64, grad_fn=<AddBackward0>)
7 tensor(2769.4024, dtype=torch.float64, grad_fn=<AddBackward0>)
8 tensor(3173.2590, dtype=torch.float64, grad_fn=<AddBackward0>)
9 tensor(3488.9858, dtype=torch.float64, grad_fn=<AddBackward0>)
10 tensor(3853.0679, dtype=torch.float64, grad_fn=<AddBackward0>)
11 tensor(4192.5381, dtype=torch.float64, grad_fn=<AddBackward0>)
12 tensor(4540.0599, dtype=torch.float64, grad_fn=<AddBackward0>)
13 tensor(4868.4100, dtype=torch.float64, grad_fn=<AddBackward0>)
14 tensor(5141.7237, dtype=torch.float64, grad_fn=<AddBackward0>)
15 tensor(5485.1656, dtype=torch.float64, grad_fn=<AddBackward0>)
16 tensor(5810.2758, dtype=torch.float64, grad_fn=<AddBackward0>)
17 tensor(6150.4915, dtype=torch.float64, grad_fn=<AddBackward0>)
18 tensor(6511.3681, dtype=torch.float64, grad_fn=<AddBackward0>)
19 tensor(6844.5

In [22]:
def argmin_solver(y_pred):
    z_star = newsvendor_solve_kkt_M.forward(y_pred)
    return z_star

n_batches = int(np.ceil(Y_pred_BNN.shape[1]/BATCH_SIZE_LOADER))

f_total = 0
f_total_best = 0

for b in range(0, n_batches):
    i_low = b*BATCH_SIZE_LOADER
    i_up = (b+1)*BATCH_SIZE_LOADER
    if b == n_batches-1:
        i_up = n_batches*Y_pred_BNN.shape[1]
    f_total += cost_fn(Y_pred_BNN[:,i_low:i_up,:], Y_val_original[i_low:i_up,:])/n_batches
    print(b, f_total)

0 tensor(360.6398, dtype=torch.float64, grad_fn=<AddBackward0>)
1 tensor(702.9200, dtype=torch.float64, grad_fn=<AddBackward0>)
2 tensor(1052.3830, dtype=torch.float64, grad_fn=<AddBackward0>)
3 tensor(1365.4478, dtype=torch.float64, grad_fn=<AddBackward0>)
4 tensor(1724.8241, dtype=torch.float64, grad_fn=<AddBackward0>)
5 tensor(2107.8547, dtype=torch.float64, grad_fn=<AddBackward0>)
6 tensor(2445.2796, dtype=torch.float64, grad_fn=<AddBackward0>)
7 tensor(2721.7141, dtype=torch.float64, grad_fn=<AddBackward0>)
8 tensor(3121.2553, dtype=torch.float64, grad_fn=<AddBackward0>)
9 tensor(3431.9031, dtype=torch.float64, grad_fn=<AddBackward0>)
10 tensor(3787.1780, dtype=torch.float64, grad_fn=<AddBackward0>)
11 tensor(4118.1884, dtype=torch.float64, grad_fn=<AddBackward0>)
12 tensor(4460.6765, dtype=torch.float64, grad_fn=<AddBackward0>)
13 tensor(4784.5170, dtype=torch.float64, grad_fn=<AddBackward0>)
14 tensor(5053.0189, dtype=torch.float64, grad_fn=<AddBackward0>)
15 tensor(5393.8698, d

In [23]:
def argmin_solver(y_pred):
    z_star = newsvendor_solve_kkt_M.forward(y_pred)
    return z_star

n_batches = int(np.ceil(Y_pred_flow.shape[1]/BATCH_SIZE_LOADER))

f_total = 0
f_total_best = 0

for b in range(0, n_batches):
    i_low = b*BATCH_SIZE_LOADER
    i_up = (b+1)*BATCH_SIZE_LOADER
    if b == n_batches-1:
        i_up = n_batches*Y_pred_flow.shape[1]
    f_total += cost_fn(Y_pred_flow[:,i_low:i_up,:], Y_val_original[i_low:i_up,:])/n_batches
    print(b, f_total)

0 tensor(354.4790, dtype=torch.float64)
1 tensor(689.6819, dtype=torch.float64)
2 tensor(1031.8333, dtype=torch.float64)
3 tensor(1340.4632, dtype=torch.float64)
4 tensor(1690.9033, dtype=torch.float64)
5 tensor(2065.6534, dtype=torch.float64)
6 tensor(2398.2139, dtype=torch.float64)
7 tensor(2671.9200, dtype=torch.float64)
8 tensor(3067.8034, dtype=torch.float64)
9 tensor(3370.2477, dtype=torch.float64)
10 tensor(3720.3259, dtype=torch.float64)
11 tensor(4047.8493, dtype=torch.float64)
12 tensor(4380.8409, dtype=torch.float64)
13 tensor(4700.4676, dtype=torch.float64)
14 tensor(4963.4861, dtype=torch.float64)
15 tensor(5294.0947, dtype=torch.float64)
16 tensor(5606.7736, dtype=torch.float64)
17 tensor(5938.9002, dtype=torch.float64)
18 tensor(6289.8334, dtype=torch.float64)
19 tensor(6611.9189, dtype=torch.float64)
20 tensor(6908.5777, dtype=torch.float64)
21 tensor(7239.0349, dtype=torch.float64)
22 tensor(7579.4811, dtype=torch.float64)
23 tensor(8024.3282, dtype=torch.float64)
24 t

In [24]:
Y_val_original.shape

torch.Size([3000, 8])

In [16]:
def argmin_solver(y_pred):
    z_star = newsvendor_solve_kkt.forward(y_pred)
    return z_star

n_batches = int(np.ceil(Y_val_original.shape[0]/BATCH_SIZE_LOADER))

f_total = 0
f_total_best = 0

for b in range(0, n_batches):
    i_low = b*BATCH_SIZE_LOADER
    i_up = (b+1)*BATCH_SIZE_LOADER
    if b == n_batches-1:
        i_up = n_batches*Y_val_original.shape[0]
    f_total += cost_fn(Y_val_original[i_low:i_up,:].unsqueeze(0), Y_val_original[i_low:i_up,:])/n_batches
    print(f_total)

torch.linalg.eig returns complex tensors of dtype cfloat or cdouble rather than real tensors mimicking complex tensors.
L, _ = torch.eig(A)
should be replaced with
L_complex = torch.linalg.eigvals(A)
and
L, V = torch.eig(A, eigenvectors=True)
should be replaced with
L_complex, V_complex = torch.linalg.eig(A) (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:2910.)
  e, _ = torch.eig(Q[i])


tensor(289.7633, dtype=torch.float64)
tensor(629.3350, dtype=torch.float64)
tensor(937.4621, dtype=torch.float64)
tensor(1242.4037, dtype=torch.float64)
tensor(1539.6558, dtype=torch.float64)
tensor(1824.8601, dtype=torch.float64)
tensor(2110.9020, dtype=torch.float64)
tensor(2328.5754, dtype=torch.float64)
tensor(2645.5299, dtype=torch.float64)
tensor(2951.5173, dtype=torch.float64)
tensor(3240.8417, dtype=torch.float64)
tensor(3481.8489, dtype=torch.float64)
tensor(3837.9704, dtype=torch.float64)
tensor(4042.0496, dtype=torch.float64)
tensor(4283.3652, dtype=torch.float64)
tensor(4554.5071, dtype=torch.float64)
tensor(4897.7962, dtype=torch.float64)
tensor(5231.1427, dtype=torch.float64)
tensor(5496.6919, dtype=torch.float64)
tensor(5855.5249, dtype=torch.float64)
tensor(6165.3436, dtype=torch.float64)
tensor(6462.6735, dtype=torch.float64)
tensor(6764.1950, dtype=torch.float64)
tensor(7143.3074, dtype=torch.float64)
tensor(7493.8135, dtype=torch.float64)
tensor(7807.1558, dtype=torc

In [25]:
(argmin_solver(reshape_outcomes(Y_val_original[0:50,:].unsqueeze(0)))*params_t['pr']).sum(1)

tensor([1911.3810, 3548.2932, 3548.2932, 3548.2932, 2453.8240, 3548.2932,
        3548.2932, 3548.2932, 3548.2932, 3548.2932, 3548.2932, 3548.2932,
        3548.2932, 3548.2932, 3548.2932, 3548.2932, 3548.2932, 3548.2932,
        3548.2932, 3548.2932, 3548.2932, 3548.2932, 3548.2932, 3548.2932,
        3077.1531, 1905.3210, 3548.2932, 3548.2932, 3548.2932, 3548.2932,
        3548.2932, 3548.2932, 2699.9140, 3548.2932, 1427.2790, 3548.2932,
        3548.2932, 3548.2932, 3455.7500, 3548.2932, 3548.2932, 3548.2932,
        3548.2932, 3548.2932, 3548.2932, 3548.2932, 3347.9950, 3135.0530,
        3548.2932, 3548.2932], dtype=torch.float64)

1.83 1.64 2.43 28104 27870 27787 13598
1.87 1.89 2.46 42607 42070 41574 28124
1.80 1.85 2.25 34329 33855 33395 15849


