# Imports

In [1]:
# Import utils
import numpy as np
import pandas as pd
import copy
import time
import datetime as dt
import pickle
import json
from pathlib import Path
import joblib
from joblib import dump, load, Parallel, delayed
import os
import itertools
import contextlib
from tqdm import tqdm

# Import Weights Model
import WeightsModel3
from WeightsModel3 import PreProcessing
from WeightsModel3 import RandomForestWeightsModel


# Import (Rolling Horizon) Weighted SAA models
from WeightedSAA6 import WeightedSAA
from WeightedSAA6 import RobustWeightedSAA
from WeightedSAA6 import RollingHorizonOptimization

# General paths and parameters

In [387]:
# Set folder names as global variables
os.chdir('/home/fesc/DataDrivenDynamicInventoryControl/')
global PATH_DATA, PATH_WEIGHTSMODEL, PATH_RESULTS

PATH_DATA = '/home/fesc/DataDrivenDynamicInventoryControl/Data' 
PATH_WEIGHTSMODEL = '/home/fesc/DataDrivenDynamicInventoryControl/Data/WeightsModel'
PATH_RESULTS = '/home/fesc/DataDrivenDynamicInventoryControl/Data/Results'

In [388]:
# Time period and SKU ranges
T = 13                  # Planning horizon T
ts = range(1,13+1)      # Periods t=1,...,T of the planning horizon
taus = range(0,4+1)     # Look-aheads tau=0,...,4 to use
es = [1,3,6,9,12]       # Uncertainty set specifications e=1,...,12
SKUs = range(1,460+1)   # Products (SKUs) k=1,...,M

# Train/test split (first timePeriods of testing horizon)
test_start = 114

# Cost param settings
cost_params = [

    {'CR': 0.50, 'K': 100, 'u': 0.5, 'h': 1, 'b': 1},
    {'CR': 0.75, 'K': 100, 'u': 0.5, 'h': 1, 'b': 3},
    {'CR': 0.90, 'K': 100, 'u': 0.5, 'h': 1, 'b': 9}

]

# Training and Sampling

## Global Training and Samping

The two global models (using 'Global Training and Sampling') are **Rolling Horizon Global Weighted SAA (GwSAA)**, which is our model, and **Rolling Horizon Global Robust Weighted SAA (GwSAA-R)**, which is the analogous model with robust extension.

Given product $k$, period $t$, and look-ahead $\tau$, both models apply Weighted SAA over the 'global' distribution $\{\{w_{j,t,\tau}^{\,i}(x_{k,t}^{\,i}),(d_{j,t}^{\,i},...,d_{j,t+\tau}^{\,i})\}_{i=1}^{N_{j,t,\tau}}\}_{j=1}^{M}$, with weight functions $w_{j,t,\tau}(\,\cdot\,)$ trained (once for all products) on data $S_{t,\tau}^{\,\text{Global}}=\{\{(x_{j,t}^{\,i},d_{j,t}^{\,i},...,d_{j,t+\tau}^{\,i})\}_{i=1}^{N_{j,t,\tau}}\}_{j=1}^{M}$.

### Data Preprocessing

We first load and pre-process the data. This includes reshaping demand time series into $(\tau+1)$-periods rolling look-ahead horizon sequences.

- **ID_Data** (pd.DataFrame) stores identifiers (in particular the product (SKU) identifier and the timePeriod (sale_yearweek) identifier)
- **X_Data** (pd.DataFrame) is the 'feature matrix', i.e., each row is a feature vector $x_{j,n}$ where n is the number of training observations (rows) in the data
- **Y_Data** (pd.DataFrame) is the demand data $d_{j,n}$ (a times series per product)
- **X_Data_Columns** (pd.DataFrame) provides 'selectors' for local vs. global feature sets

In [215]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_r_z' # reshaped and scaled
weightsmodel_name = 'rfwm_global_r_z' # reshaped and scaled

In [389]:
# Read data
ID_Data = pd.read_csv(PATH_DATA+'/ID_Data.csv')
X_Data = pd.read_csv(PATH_DATA+'/X_Data.csv')
X_Data_Columns = pd.read_csv(PATH_DATA+'/X_Data_Columns4.csv')
Y_Data = pd.read_csv(PATH_DATA+'/Y_Data.csv')

In [390]:
# Select features
X_Data_Columns = X_Data_Columns.loc[X_Data_Columns.Global == 'YES']
X_Data = X_Data[X_Data_Columns.Feature.values]

In [391]:
# Ensure data is sorted by SKU and sale_yearweek for preprocessing
data = pd.concat([ID_Data, X_Data, Y_Data], axis=1).sort_values(by=['SKU', 'sale_yearweek']).reset_index(drop=True)
ID_Data = data[ID_Data.columns]
X_Data = data[X_Data.columns]
Y_Data = data[Y_Data.columns]

In [243]:
# Get feature scalers
features_to_scale_with = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].ScaleWith.unique()

q = 0.975
lags = [1,2,3,4,5]

x_scalers = {}
for t in ts:
    x_scalers[t] = {}
    for SKU in SKUs:
        x_scalers[t][SKU] = {}
        for feature_to_scale_with in features_to_scale_with:
            z = []
            for l in lags:
                if feature_to_scale_with+'_lag'+str(l) in X_Data:
                    sel = (ID_Data.sale_yearweek <= test_start+t-1) & (ID_Data.SKU == SKU)
                    z_ = np.quantile(X_Data.loc[sel][feature_to_scale_with+'_lag'+str(l)], q, method='closest_observation')
                    if z_ > 0:
                        z += [z_]
                    else:
                        z += [max(X_Data.loc[sel][feature_to_scale_with+'_lag'+str(l)])]

            x_scalers[t][SKU][feature_to_scale_with] = max(z) if max(z) > 0 else 1
            
_ = joblib.dump(x_scalers, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_x_scalers.joblib')  

In [348]:
# Get demand scalers
q = 0.975


y_scalers = {}
for t in ts:
    y_scalers[t] = {}
    for SKU in SKUs:        
        sel = (ID_Data.sale_yearweek <= test_start+t-1) & (ID_Data.SKU == SKU)
        z_ = np.quantile(Y_Data.loc[sel]['Y'], q, method='closest_observation')
        if z_ > 0:
            y_scalers[t][SKU] = z_
        else:
            y_scalers[t][SKU] = max(Y_Data.loc[sel]['Y']) if max(Y_Data.loc[sel]['Y']) > 0 else 1

_ = joblib.dump(y_scalers, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_y_scalers.joblib')  

In [352]:
class qMaxScaler:
    
    #### Initialize
    def __init__(self, q_outlier=0.999, **kwargs):
        
        # Quantile up to which data should be considered for fitting the scaler
        self.q_outlier = q_outlier
        
        # Lags to consider for features used for scaling
        self.lags = [1,2,3,4,5]
        
        return None
    
    
    def fit(self, X, features=None, features_to_scale_with=None, **kwargs):
        
        """
        
        ...
        X: ...
        features: ...
        features_to_scale_with: ...
        kwargs: can provide q_outlier to overwrite default / initialized value of q_outlier
        
        """
        
        # Update q_outlier if provided
        self.q_outlier = kwargs.get('q_outlier', self.q_outlier)
        
        # Store features and features_to_scale_with
        self.features = features
        self.features_to_scale_with = features_to_scale_with

        # Initialize
        self.scalers = {}

        # If both 'features' and 'features_to_scale_with' are provided
        if (not features is None) and (not features_to_scale_with is None):
            
            # For each feature used for scaling, get the scaling value (max of q-quantile per lag of the feature)
            for feature_to_scale_with in features_to_scale_with:
                z = []
                for l in self.lags:
                    if feature_to_scale_with+'_lag'+str(l) in features:
                        sel = (features == feature_to_scale_with+'_lag'+str(l))
                        z_ = np.quantile(X[:,sel], self.q_outlier, method='closest_observation')
                        if z_ > 0:
                            z += [z_]
                        else:
                            z += [max(X[:,sel])]

                self.scalers[feature_to_scale_with] = max(z) if max(z) > 0 else 1
                
        # Else, use all columns in X for scaling
        else:
            
            # For each column in X, get the scaling value (max of q-quantile)
            for col in range(X.shape[1]):
     
                z_ = np.quantile(X[:,col], self.q_outlier, method='closest_observation')
                if z_ > 0:
                    self.scalers[col] = z_
                else:
                    self.scalers[col] = max(X[:,col]) if max(X[:,col]) > 0 else 1

        return self
    
    
    
    def transform(self, **kwargs):
        
        return None

In [353]:
# Fit feature scaler
x_scalers = {}
for t in ts:
    x_scalers[t] = {}
    for SKU in SKUs:
        scaler = qMaxScaler(q_outlier=0.975)
        X = np.array(X_Data.loc[(ID_Data.sale_yearweek <= test_start+t-1) & (ID_Data.SKU == SKU)])           
        features = X_Data.columns
        features_to_scale_with = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].ScaleWith.unique()
        x_scalers[t][SKU] = scaler.fit(X, features, features_to_scale_with)
        
_ = joblib.dump(x_scalers, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_x_scalers.joblib')  

In [354]:
# Fit demand scaler
y_scalers = {}
for t in ts:
    y_scalers[t] = {}
    for SKU in SKUs:
        scaler = qMaxScaler(q_outlier=0.975)
        y = np.array(Y_Data.loc[(ID_Data.sale_yearweek <= test_start+t-1) & (ID_Data.SKU == SKU)])           
        y_scalers[t][SKU] = scaler.fit(y)
        
_ = joblib.dump(y_scalers, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_y_scalers.joblib') 

In [123]:
# Scale

In [355]:
t=1

In [356]:
X_Data_z = copy.deepcopy(X_Data)
vars_to_scale = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].Feature
vars_to_scale_with = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].ScaleWith

In [357]:
for SKU in SKUs:
    for var_to_scale, var_to_scale_with in zip(vars_to_scale, vars_to_scale_with):

        X_Data_z.loc[ID_Data.SKU == SKU, var_to_scale] = (
            X_Data.loc[ID_Data.SKU == SKU][var_to_scale] / x_scalers[t][SKU].scalers[var_to_scale_with]
        )

In [427]:
# Create multi-period demand vectors
data = pd.concat([ID_Data, Y_Data], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
Y_Data = pd.DataFrame(Y)

### Weights model

The weights models - and thus the data used, weight functions, and weights per sample - are the same for the two global models **GwSAA** and **GwSAA-R**. First, we tune the hyper parameters of the random forest weights model for each given look-ahead $\tau$ (as for each look-ahead $\tau$ we have a different response for the multi-output random forest regressor). Second, we fit all weight functions (for each look-ahead $\tau=0,...,4$ and over periods $t=1,...,T$) and generate all weights (for each look-ahead $\tau=0,...,4$, over periods $t=1,...,T$, and for each product (SKU) $k=1,...,M$).

#### Tune weights model

To tune the hyper parameters of the global random forest weights model, we use 3-fold rolling timeseries cross-validation on the training data and perform random search with 100 iterations over the specified hyper parameter search grid.

In [None]:
# Set parameters
model_params = {
    'oob_score': True,
    'random_state': 12345,
    'n_jobs': 4,
    'verbose': 0
}

hyper_params_grid = {
    'n_estimators': [1000],
    'max_depth': [None],
    'min_samples_split': [x for x in range(20, 1000, 20)],  
    'min_samples_leaf': [x for x in range(10, 1000, 10)],  
    'max_features': [x for x in range(8, 256, 8)],   
    'max_leaf_nodes': [None],
    'min_impurity_decrease': [0.0],
    'bootstrap': [True],
    'max_samples': [0.75, 0.80, 0.85, 0.90, 0.95, 1.00]
}    


tuning_params = {     
    'n_iter': 100,
    'scoring': {'MSE': 'neg_mean_squared_error'},
    'return_train_score': True,
    'refit': 'MSE',
    'random_state': 12345,
    'n_jobs': 8,
    'verbose': 2
}    

random_search = True
print_status = True

In [358]:
# Preprocess features
X_Data_z = copy.deepcopy(X_Data)
features_to_scale = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].Feature
features_to_scale_with = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].ScaleWith

t=1
for SKU in SKUs:
    for feature_to_scale, feature_to_scale_with in zip(features_to_scale, features_to_scale_with):

        X_Data_z.loc[ID_Data.SKU == SKU, feature_to_scale] = (
            X_Data.loc[ID_Data.SKU == SKU][feature_to_scale] / x_scalers[t][SKU].scalers[feature_to_scale_with]
        )

In [361]:
# Preprocess demands
Y_Data_z = copy.deepcopy(Y_Data)

t=1
for SKU in SKUs:
    Y_Data_z.loc[ID_Data.SKU == SKU, 'Y'] = (
        Y_Data.loc[ID_Data.SKU == SKU]['Y'] / y_scalers[t][SKU].scalers[0]
    )

In [362]:
# Create multi-period demand vectors
data = pd.concat([ID_Data, Y_Data], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
Y_Data = pd.DataFrame(Y)

data = pd.concat([ID_Data, Y_Data_z], axis=1)
Y_z = {}
for tau in taus:
    Y_z['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
Y_Data_z = pd.DataFrame(Y_z)

In [363]:
# Set parameters
model_params = {
    'n_jobs': 64,
    'verbose': 0
}

print_status = True

In [364]:
hyper_params = {}


hyper_params[0] = {
    
    'max_features': 144, 
    'min_samples_leaf': 10,
    'n_estimators': 500,
}

In [365]:
tau=0
    
# Initialize
samples, weightfunctions, weightfunctions_times, weights, weights_times = {}, {}, {}, {}, {}

t=1

# Adjust look-ahead tau to account for end of horizon
tau_ = min(tau,T-t)

# Status
print('#### Look-ahead tau='+str(tau)+' (tau\'='+str(tau_)+'), period t='+str(t)+'...')
start_time = dt.datetime.now().replace(microsecond=0)

# Generate samples, fit weight functions, and generate weights (based on tuned weights model)
weightsmodel = RandomForestWeightsModel(hyper_params=hyper_params[tau_])

res = weightsmodel.training_and_sampling(ID_Data, X_Data_z, Y_Data_z, tau=tau_, timePeriods=ID_Data.sale_yearweek,
                                         timePeriodsTestStart=test_start+t-1, model_params=model_params)
samples[t], weightfunctions[t], weightfunctions_times[t], weights[t], weights_times[t] = res

#### Look-ahead tau=0 (tau'=0), period t=1...


In [367]:
y_pred_z = weightsmodel.weightsmodel.predict(samples[1]['X_train'])

In [267]:
samples[1]['id_train'].shape

(47312, 4)

In [368]:
y_pred_zz = {}
for SKU in SKUs:
    y_pred_zz[SKU] = y_pred_z[samples[1]['id_train'].SKU == SKU] * y_scalers[1][SKU].scalers[0]

In [370]:
samples_not_scaled = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_not_reshaped_old_rf_params_samples_tau0.joblib')

In [371]:
weightfunctions_not_scaled = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_not_reshaped_old_rf_params_weightfunctions_tau0.joblib')

In [372]:
y_pred = weightfunctions_not_scaled[1].weightsmodel.predict(samples_not_scaled[1]['X_train'])

In [373]:
y_pred.shape

(47312,)

In [374]:
y = samples_not_scaled[1]['y_train']

In [375]:
y.shape

(47312,)

In [376]:
mse_z = []
mse = []
for SKU in SKUs:
    mse_z += [np.mean((y_pred_zz[SKU]-y[samples_not_scaled[1]['id_train'].SKU==SKU])**2)]
    mse += [np.mean((y_pred[samples_not_scaled[1]['id_train'].SKU==SKU]-y[samples_not_scaled[1]['id_train'].SKU==SKU])**2)]

In [377]:
test = pd.DataFrame({'SKU': SKUs, 'mse_z': mse_z, 'mse': mse})
test['diff'] = test.mse_z / test.mse

In [291]:
test.describe()

Unnamed: 0,SKU,mse_z,mse,diff
count,460.0,460.0,460.0,460.0
mean,230.5,1490397.0,1559714.0,0.925545
std,132.934821,18211210.0,18603560.0,0.12616
min,1.0,0.001744011,0.01119469,0.155789
25%,115.75,15.18587,16.16273,0.869915
50%,230.5,216.2103,238.5135,0.938197
75%,345.25,4062.745,4355.276,0.986222
max,460.0,328225800.0,331642800.0,1.51361


In [378]:
test.describe()

Unnamed: 0,SKU,mse_z,mse,diff
count,460.0,460.0,460.0,460.0
mean,230.5,1490397.0,1559714.0,0.925545
std,132.934821,18211210.0,18603560.0,0.12616
min,1.0,0.001744011,0.01119469,0.155789
25%,115.75,15.18587,16.16273,0.869915
50%,230.5,216.2103,238.5135,0.938197
75%,345.25,4062.745,4355.276,0.986222
max,460.0,328225800.0,331642800.0,1.51361


In [230]:
sum(np.isinf(np.array(X_Data_z)))

array([  0,   0,   0,   0,   3,   4,   4,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  79,
         0,  76,   0,  83,   0,  78,   0,   9,   0,   0,   0,   0,   0,
         0,   0,   0,   0,  79,   0,  76,   0,  82,   0,  77,   0,   8,
         0,   0,   0,   0,   0,   0,   0,   0,   0,  80,   0,  77,   0,
        82,   0,  77,   0,   7,   0,   0,   0,   0,   0,   0,   0,   0,
         0,  82,   0,  79,   0,  83,   0,  77,   0,   7,   0,   0,   0,
         0,   0,   0,   0,   0,   0,  84,   0,  81,   0,  85,   

In [191]:
X_Data_z

Unnamed: 0,n_pharmacies_lag1,n_SKU_lag1,n_SKU_x_brand_lag1,n_SKU_x_brand_x_strength_lag1,SKU_n_variants_lag1,SKU_n_pharmacies_lag1,SKU_avg_variants_lag1,SKU_dqs_min_lag1,SKU_dqs_median_lag1,SKU_dqs_mean_lag1,...,nwt_movav_cluster1,nwt_movav_cluster2,nwt_movav_cluster3,nwt_movav_cluster4,nwt_movav_cluster5,nwt_movav_cluster6,nwt_movav_cluster7,nwt_movav_cluster8,nwt_movav_cluster9,nwt_movav_cluster10
0,13,297,1638,2060,0.151515,0.098901,0.694444,0.0,0.000000,0.025112,...,68.2,2.0,93.4,0.6,1.2,0.6,0.4,0.4,188.0,0.2
1,15,308,1741,2206,0.121212,0.098901,0.763889,0.0,0.015625,0.023996,...,61.0,2.6,89.6,0.2,0.6,0.4,0.2,0.0,181.0,0.0
2,20,316,1867,2373,0.121212,0.098901,0.833333,0.0,0.000000,0.013393,...,63.0,3.8,89.6,0.2,1.8,0.4,0.0,0.0,180.2,0.0
3,19,324,2006,2557,0.090909,0.109890,0.812500,0.0,0.015625,0.013393,...,68.0,4.0,100.8,0.6,1.4,0.0,0.2,0.0,188.0,0.0
4,22,337,2129,2722,0.151515,0.142857,0.817308,0.0,0.046875,0.066406,...,71.4,4.2,109.4,0.6,1.8,0.2,0.2,0.0,198.8,0.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53287,176,449,11205,15210,0.666667,1.000000,1.000000,0.0,0.000000,0.039683,...,847.0,24.2,1270.2,23.4,34.0,2.4,0.6,0.0,1302.0,39.4
53288,179,449,10919,14827,1.000000,0.800000,1.000000,0.0,0.000000,0.126984,...,846.6,27.4,1265.8,26.4,29.8,3.2,0.4,0.0,1301.2,38.6
53289,182,450,10753,14613,1.000000,1.000000,1.000000,0.0,0.055556,0.158730,...,845.0,26.0,1283.2,27.4,27.8,3.0,0.8,0.0,1331.0,39.0
53290,180,452,10495,14209,0.000000,0.400000,1.000000,0.0,0.000000,0.000000,...,836.6,29.2,1277.4,26.6,28.0,4.0,1.4,0.0,1373.4,40.0


In [232]:
ID_Data.loc[(X_Data_z.SKU_n_variants_lag1 == np.inf)]

Unnamed: 0,SKU,sale_year,sale_week,sale_yearweek
13558,117,2019.0,13,117.0
13565,117,2019.0,20,124.0
13566,117,2019.0,21,125.0


In [233]:
x_scalers[1][117]

{'SKU_n_variants': 0,
 'SKU_n_pharmacies': 0,
 'SKU_avg_variants': 0.0,
 'SKU_dqs_max': 1,
 'SKU_wqs': 1,
 'SKU_tbt_max': 707.1138657,
 'SKU_ndt_max': 1,
 'SKU_nwt': 1,
 'SKU_unit_discount_max': 0.0,
 'SKU_unit_price_max': 900.0}

In [201]:
X_Data.loc[ID_Data.SKU==117]

Unnamed: 0,n_pharmacies_lag1,n_SKU_lag1,n_SKU_x_brand_lag1,n_SKU_x_brand_x_strength_lag1,SKU_n_variants_lag1,SKU_n_pharmacies_lag1,SKU_avg_variants_lag1,SKU_dqs_min_lag1,SKU_dqs_median_lag1,SKU_dqs_mean_lag1,...,nwt_movav_cluster1,nwt_movav_cluster2,nwt_movav_cluster3,nwt_movav_cluster4,nwt_movav_cluster5,nwt_movav_cluster6,nwt_movav_cluster7,nwt_movav_cluster8,nwt_movav_cluster9,nwt_movav_cluster10
13461,37,386,3373,4298,0,0,0.0,0,0.0,0.000000,...,101.2,3.6,183.0,1.4,3.6,0.4,0.2,0.0,318.0,0.8
13462,40,394,3540,4514,0,0,0.0,0,0.0,0.000000,...,104.0,3.4,205.2,1.6,4.2,0.4,0.2,0.0,344.6,0.4
13463,40,399,3745,4778,0,0,0.0,0,0.0,0.000000,...,110.8,5.0,233.4,2.2,4.2,0.4,0.4,0.0,377.0,0.4
13464,40,407,3867,4936,0,0,0.0,0,0.0,0.000000,...,122.4,5.8,254.8,2.6,4.4,0.6,0.4,0.0,420.2,0.6
13465,41,414,4007,5102,0,0,0.0,0,0.0,0.000000,...,131.2,6.2,284.2,2.2,4.6,0.6,0.8,0.4,449.2,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13563,176,449,11205,15210,0,0,0.0,0,0.0,0.000000,...,847.0,24.2,1270.2,23.4,34.0,2.4,0.6,0.0,1302.0,39.4
13564,179,449,10919,14827,0,0,0.0,0,0.0,0.000000,...,846.6,27.4,1265.8,26.4,29.8,3.2,0.4,0.0,1301.2,38.6
13565,182,450,10753,14613,1,1,1.0,0,0.0,0.142857,...,845.0,26.0,1283.2,27.4,27.8,3.0,0.8,0.0,1331.0,39.0
13566,180,452,10495,14209,1,1,1.0,0,0.0,0.142857,...,836.6,29.2,1277.4,26.6,28.0,4.0,1.4,0.0,1373.4,40.0


In [203]:
Y_Data.loc[ID_Data.SKU==117].describe()

Unnamed: 0,Y0,Y1,Y2,Y3,Y4
count,107.0,106.0,105.0,104.0,103.0
mean,0.028037,0.028302,0.028571,0.028846,0.029126
std,0.165856,0.166622,0.167398,0.168185,0.168983
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0


In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Initialize preprocessing module
    pp = PreProcessing()
        
    # Select and training data
    args = {'train': (ID_Data.sale_yearweek < test_start - tau)}
    
    id_train = pp.train_test_split(ID_Data, **args)
    X_train = pp.train_test_split(X_Data_z, **args, to_array=True)
    y_train = pp.train_test_split(Y_Data_z, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)
   
    # Initialize
    weightsmodel = RandomForestWeightsModel(model_params)

    # CV search
    cv_folds = pp.split_timeseries_cv(n_splits=3, timePeriods=id_train.sale_yearweek)
    cv_results = weightsmodel.tune(X_train, y_train, cv_folds, hyper_params_grid, tuning_params, random_search, print_status)
    weightsmodel.save_cv_result(path=PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau)+'.joblib')

#### Fit weight functions and generate weights

We now fit the global random forest weights model (i.e., the weight functions) for each $\tau=0,...,4$ and over periods $t=1,...,T$. This is done across all products at once (global training). Then, for each $\tau=0,...,4$ and over periods $t=1,...,T$, we generate for each product (SKU) $k=1,...,M$ the weights given the test feature $x_{k,t}$. This is done *jointly* across products (by using $x_{t}=(x_{1,t},...,x_{M,t})^{\top}$) for computational efficiency - the weights for each individual product can be extracted afterwards.

In [None]:
# Set parameters
model_params = {
    'n_jobs': 32,
    'verbose': 0
}

print_status = True

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Initialize
    samples, weightfunctions, weightfunctions_times, weights, weights_times = {}, {}, {}, {}, {}
        
    # For each period t=1,...,T
    for t in ts:

        # Adjust look-ahead tau to account for end of horizon
        tau_ = min(tau,T-t)
        
        # Status
        print('#### Look-ahead tau='+str(tau)+' (tau\'='+str(tau_)+'), period t='+str(t)+'...')
        start_time = dt.datetime.now().replace(microsecond=0)
                
        # Generate samples, fit weight functions, and generate weights (based on tuned weights model)
        weightsmodel = RandomForestWeightsModel()
        weightsmodel.load_cv_result(path=PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau_)+'.joblib')
        
        res = weightsmodel.training_and_sampling2(ID_Data, X_Data, Y_Data, tau=tau_, timePeriods=ID_Data.sale_yearweek,
                                                  timePeriodsTestStart=test_start+t-1, model_params=model_params)
        samples[t], weightfunctions[t], weightfunctions_times[t], weights[t], weights_times[t] = res
        
        # Status
        print('...done in', dt.datetime.now().replace(microsecond=0) - start_time)    
        
    # Save
    _ = joblib.dump(samples, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')  
    _ = joblib.dump(weightfunctions, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weightfunctions_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_times_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_times_tau'+str(tau)+'.joblib')    

In [None]:
##### Testing with "old" hyper params for global RF models ...

In [18]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_not_reshaped_old_rf_params'
weightsmodel_name = 'rfwm_global_not_reshaped_old_rf_params'

In [None]:
rfwm_0 = joblib.load('/home/fesc/DataDrivenDynamicInventoryControl/rf_mv_t1.joblib')
rfwm_1 = joblib.load('/home/fesc/DataDrivenDynamicInventoryControl/rf_mv_t2.joblib')
rfwm_2 = joblib.load('/home/fesc/DataDrivenDynamicInventoryControl/rf_mv_t3.joblib')
rfwm_3 = joblib.load('/home/fesc/DataDrivenDynamicInventoryControl/rf_mv_t4.joblib')
rfwm_4 = joblib.load('/home/fesc/DataDrivenDynamicInventoryControl/rf_mv_t5.joblib')

In [392]:
# Set parameters
model_params = {
    'n_jobs': 96,
    'verbose': 0
}

print_status = True

In [393]:
hyper_params = {}


hyper_params[0] = {
    
    'max_features': 144, 
    'min_samples_leaf': 10,
    'n_estimators': 500,
}

hyper_params[1] = {
    
    'max_features': 160, 
    'min_samples_leaf': 10,
    'n_estimators': 500,
}

hyper_params[2] = {
    
    'max_features': 96, 
    'min_samples_leaf': 10,
    'n_estimators': 500,
}

hyper_params[3] = {
    
    'max_features': 112, 
    'min_samples_leaf': 10,
    'n_estimators': 500,
}

hyper_params[4] = {
    
    'max_features': 48, 
    'min_samples_leaf': 10,
    'n_estimators': 500,
}

In [22]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Initialize
    samples, weightfunctions, weightfunctions_times, weights, weights_times = {}, {}, {}, {}, {}
        
    # For each period t=1,...,T
    for t in ts:

        # Adjust look-ahead tau to account for end of horizon
        tau_ = min(tau,T-t)
        
        # Status
        print('#### Look-ahead tau='+str(tau)+' (tau\'='+str(tau_)+'), period t='+str(t)+'...')
        start_time = dt.datetime.now().replace(microsecond=0)
                
        # Generate samples, fit weight functions, and generate weights (based on tuned weights model)
        weightsmodel = RandomForestWeightsModel(hyper_params=hyper_params[tau_])
        
        res = weightsmodel.training_and_sampling2(ID_Data, X_Data, Y_Data, tau=tau_, timePeriods=ID_Data.sale_yearweek,
                                                  timePeriodsTestStart=test_start+t-1, model_params=model_params)
        samples[t], weightfunctions[t], weightfunctions_times[t], weights[t], weights_times[t] = res
        
        # Status
        print('...done in', dt.datetime.now().replace(microsecond=0) - start_time)    
        
    # Save
    _ = joblib.dump(samples, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')  
    _ = joblib.dump(weightfunctions, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weightfunctions_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_times_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_times_tau'+str(tau)+'.joblib')    

#### Look-ahead tau=0 (tau'=0), period t=1...
...done in 0:01:24
#### Look-ahead tau=0 (tau'=0), period t=2...
...done in 0:01:28
#### Look-ahead tau=0 (tau'=0), period t=3...
...done in 0:01:26
#### Look-ahead tau=0 (tau'=0), period t=4...
...done in 0:01:30
#### Look-ahead tau=0 (tau'=0), period t=5...
...done in 0:05:12
#### Look-ahead tau=0 (tau'=0), period t=6...
...done in 0:01:33
#### Look-ahead tau=0 (tau'=0), period t=7...
...done in 0:01:30
#### Look-ahead tau=0 (tau'=0), period t=8...
...done in 0:01:35
#### Look-ahead tau=0 (tau'=0), period t=9...
...done in 0:01:32
#### Look-ahead tau=0 (tau'=0), period t=10...
...done in 0:01:34
#### Look-ahead tau=0 (tau'=0), period t=11...
...done in 0:01:35
#### Look-ahead tau=0 (tau'=0), period t=12...
...done in 0:01:39
#### Look-ahead tau=0 (tau'=0), period t=13...
...done in 0:01:36
#### Look-ahead tau=1 (tau'=1), period t=1...
...done in 0:01:32
#### Look-ahead tau=1 (tau'=1), period t=2...
...done in 0:01:29
#### Look-ahead tau=1

In [None]:
## Why does it take longer for t=5 (tau=0)?? Check for other tau's... is there a data issue?

# tau=0, t=5
# tau=1, t=6
# tau=2, t=7
# tau=3, t=8
# tau=4, t=9

# ... what is happening here??

# it happends when t-tau == 5

In [433]:
#### RESHAPED WITH OLD HYPER PARAMS AND SCALING
weightsmodel_name = 'rfwm_global_r_z_old_hyper_params'

In [434]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Initialize
    samples, weightfunctions, weightfunctions_times, weights, weights_times = {}, {}, {}, {}, {}
        
    # For each period t=1,...,T
    for t in ts:

        # Adjust look-ahead tau to account for end of horizon
        tau_ = min(tau,T-t)
        
        # Status
        print('#### Look-ahead tau='+str(tau)+' (tau\'='+str(tau_)+'), period t='+str(t)+'...')
        start_time = dt.datetime.now().replace(microsecond=0)
                
        # Generate samples, fit weight functions, and generate weights (based on tuned weights model)
        weightsmodel = RandomForestWeightsModel(hyper_params=hyper_params[tau_])
        
        # Preprocess features
        X_Data_z = copy.deepcopy(X_Data)
        features_to_scale = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].Feature
        features_to_scale_with = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES'].ScaleWith

        for SKU in SKUs:
            for feature_to_scale, feature_to_scale_with in zip(features_to_scale, features_to_scale_with):

                X_Data_z.loc[ID_Data.SKU == SKU, feature_to_scale] = (
                    X_Data.loc[ID_Data.SKU == SKU][feature_to_scale] / x_scalers[t][SKU].scalers[feature_to_scale_with]
                )
                
        # Preprocess demands
        Y_Data_z = copy.deepcopy(Y_Data)

        for SKU in SKUs:
            Y_Data_z.loc[ID_Data.SKU == SKU] = (
                Y_Data.loc[ID_Data.SKU == SKU] / y_scalers[t][SKU].scalers[0]
            )

        res = weightsmodel.training_and_sampling(ID_Data, X_Data_z, Y_Data_z, tau=tau_, timePeriods=ID_Data.sale_yearweek,
                                                 timePeriodsTestStart=test_start+t-1, model_params=model_params)
        samples[t], weightfunctions[t], weightfunctions_times[t], weights[t], weights_times[t] = res
        
        # Status
        print('...done in', dt.datetime.now().replace(microsecond=0) - start_time)    
        
    # Save
    _ = joblib.dump(samples, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')  
    _ = joblib.dump(weightfunctions, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weightfunctions_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_times_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_times_tau'+str(tau)+'.joblib')    

#### Look-ahead tau=0 (tau'=0), period t=1...
...done in 0:02:13
#### Look-ahead tau=0 (tau'=0), period t=2...
...done in 0:02:18
#### Look-ahead tau=0 (tau'=0), period t=3...
...done in 0:02:16
#### Look-ahead tau=0 (tau'=0), period t=4...
...done in 0:02:19
#### Look-ahead tau=0 (tau'=0), period t=5...
...done in 0:05:43
#### Look-ahead tau=0 (tau'=0), period t=6...
...done in 0:02:22
#### Look-ahead tau=0 (tau'=0), period t=7...
...done in 0:02:19
#### Look-ahead tau=0 (tau'=0), period t=8...
...done in 0:02:22
#### Look-ahead tau=0 (tau'=0), period t=9...
...done in 0:02:20
#### Look-ahead tau=0 (tau'=0), period t=10...
...done in 0:02:24
#### Look-ahead tau=0 (tau'=0), period t=11...
...done in 0:02:23
#### Look-ahead tau=0 (tau'=0), period t=12...
...done in 0:02:27
#### Look-ahead tau=0 (tau'=0), period t=13...
...done in 0:02:22
#### Look-ahead tau=1 (tau'=1), period t=1...
...done in 0:01:34
#### Look-ahead tau=1 (tau'=1), period t=2...
...done in 0:01:32
#### Look-ahead tau=1

## Local Training and Sampling

The two local models (using 'Local Training and Sampling') are **Rolling Horizon Local Weighted SAA (wSAA)**, and **Rolling Horizon Local Robust Weighted SAA (wSAA-R)**, which is the analogous model with robust extension.

Given product $k$, period $t$, and look-ahead $\tau$, both models apply Weighted SAA over the 'local' distribution $\{w_{k,t,\tau}^{\,i}(x_{k,t}^{\,i}),(d_{k,t}^{\,i},...,d_{k,t+\tau}^{\,i})\}_{i=1}^{N_{k,t,\tau}}$, with weight functions $w_{k,t,\tau}(\,\cdot\,)$ trained on data $S_{k,t,\tau}^{\,\text{Local}}=\{(x_{k,t}^{\,i},d_{k,t}^{\,i},...,d_{k,t+\tau}^{\,i})\}_{i=1}^{N_{k,t,\tau}}$ for each product $k=1,...,M$ separately.

### Data Preprocessing

We first load and pre-process the data. This includes reshaping demand time series into $(\tau+1)$-periods rolling look-ahead horizon sequences.

- **ID_Data** (pd.DataFrame) stores identifiers (in particular the product (SKU) identifier and the timePeriod (sale_yearweek) identifier)
- **X_Data** (pd.DataFrame) is the 'feature matrix', i.e., each row is a feature vector $x_{j,n}$ where n is the number of training observations (rows) in the data
- **Y_Data** (pd.DataFrame) is the demand data $d_{j,n}$ (a times series per product)
- **X_Data_Columns** (pd.DataFrame) provides 'selectors' for local vs. global feature sets

In [532]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_local_r'
weightsmodel_name = 'rfwm_local_r'

In [533]:
# Read data
ID_Data = pd.read_csv(PATH_DATA+'/ID_Data.csv')
X_Data = pd.read_csv(PATH_DATA+'/X_Data.csv')
X_Data_Columns = pd.read_csv(PATH_DATA+'/X_Data_Columns4.csv')
Y_Data = pd.read_csv(PATH_DATA+'/Y_Data.csv')

In [534]:
# Select features
X_Data_Columns = X_Data_Columns.loc[X_Data_Columns.Local == 'YES']
X_Data = X_Data[X_Data_Columns.Feature.values]

In [535]:
# Ensure data is sorted by SKU and sale_yearweek for preprocessing
data = pd.concat([ID_Data, X_Data, Y_Data], axis=1).sort_values(by=['SKU', 'sale_yearweek']).reset_index(drop=True)

ID_Data = data[ID_Data.columns]
X_Data = data[X_Data.columns]
Y_Data = data[Y_Data.columns]

In [536]:
# Create multi-period demand vectors
data = pd.concat([ID_Data, Y_Data], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
Y_Data = pd.DataFrame(Y)

### Weights model

The weights model - and thus the data used, weight functions, and weights per sample - are the same for the two local models **wSAA** and **wSAA-R**. First, we tune the hyper parameters of the random forest weights model for each given look-ahead $\tau$ (as for each look-ahead $\tau$ we have a different response for the multi-output random forest regressor) and for each product (SKU) $k=1,...,M$ separately. Second, we fit all weight functions (for each look-ahead $\tau=0,...,4$ and over periods $t=1,...,T$) for each product (SKU) $k=1,...,M$ separately and generate all weights (for each look-ahead $\tau=0,...,4$, over periods $t=1,...,T$, and for each product (SKU) $k=1,...,M$ separatey).

#### Tune weights model

To tune the hyper parameters of the local random forest weights model for each product (SKU) $k=1,...,M$, we use 3-fold rolling timeseries cross-validation on the training data and perform random search with 100 iterations over the specified hyper parameter search grid.

In [538]:
# Set parameters to tune random forest weights kernels
model_params = {
    'oob_score': True,
    'random_state': 12345,
    'n_jobs': 1,
    'verbose': 0
}

hyper_params_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None],
    'min_samples_split': [x for x in range(2, 20, 1)],  
    'min_samples_leaf': [x for x in range(2, 10, 1)],  
    'max_features': [x for x in range(8, 256, 8)],   
    'max_leaf_nodes': [None],
    'min_impurity_decrease': [0.0],
    'bootstrap': [True],
    'max_samples': [0.75, 0.80, 0.85, 0.90, 0.95, 1.00]
}    


tuning_params = {     
    'n_iter': 100,
    'scoring': {'MSE': 'neg_mean_squared_error'},
    'return_train_score': True,
    'refit': 'MSE',
    'random_state': 12345,
    'n_jobs': 32,
    'verbose': 0
}    

random_search = True
print_status = False

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Status
    print('Look-ahead tau='+str(tau)+'...')
    start_time = dt.datetime.now().replace(microsecond=0)
    
    # Initialize
    cv_results = {}
    
    # For each product (SKU) k=1,...,M
    for SKU in SKUs:

        # Initialize preprocessing module
        pp = PreProcessing()

        # Select and reshape training and test data
        args = {'train': (ID_Data.SKU == SKU) & (ID_Data.sale_yearweek < test_start - tau)}

        # id_train = pp.train_test_split2(ID_Data, **args)
        # X_train = pp.train_test_split2(X_Data, **args, to_array=True)
        # y_train = pp.train_test_split2(Y_Data, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)
        
        
        id_train = pp.train_test_split(ID_Data, **args)
        X_train = pp.train_test_split(X_Data, **args, to_array=True)
        y_train = pp.train_test_split(Y_Data, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)

        # Initialize
        weightsmodel = RandomForestWeightsModel(model_params)

        # CV search
        cv_folds = pp.split_timeseries_cv(n_splits=3, timePeriods=id_train.sale_yearweek)
        cv_results[SKU] = weightsmodel.tune(X_train, y_train, cv_folds, hyper_params_grid, 
                                            tuning_params, random_search, print_status)
        
        # Status
        print('SKU '+str(SKU)+' of '+str(len(SKUs))+' in', dt.datetime.now().replace(microsecond=0) - start_time, end='\r', flush=True)

    # Save
    _ = joblib.dump(cv_results, PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau)+'.joblib')
    print('')

#### Fit weight functions and generate weights

We now fit a local random forest weights model (i.e., the weight functions) for each $\tau=0,...,4$, period $t=1,...,T$, and product (SKU) $k=1,...,M$ separately (local training). Then, for each $\tau=0,...,4$, period $t=1,...,T$, and product (SKU) $k=1,...,M$ separately, we generate the weights given the test feature $x_{k,t}$. This is done *separately* for each product (SKU) $k=1,...,M$.

In [None]:
# Set parameters
model_params = {
    'n_jobs': 32,
    'verbose': 0
}

print_status = True

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Status
    print('Look-ahead tau='+str(tau)+'...')
    start_time = dt.datetime.now().replace(microsecond=0)
    
    # Initialize
    samples, weightfunctions, weightfunctions_times, weights, weights_times = {}, {}, {}, {}, {}
    
    # For each product (SKU) k=1,...,M
    for SKU in SKUs:
        
        # Initialize
        samples[SKU], weightfunctions[SKU], weightfunctions_times[SKU], weights[SKU], weights_times[SKU] = {}, {}, {}, {}, {}
        
        # For each period t=1,...,T
        for t in ts:
        
            # Adjust look-ahead tau to account for end of horizon
            tau_ = min(tau,T-t)

            # Generate samples, fit weight functions, and generate weights (based on tuned weights model)
            weightsmodel = RandomForestWeightsModel()
            weightsmodel.load_cv_result(path=PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau_)+'.joblib', SKU=SKU)
            res = weightsmodel.training_and_sampling(ID_Data.loc[ID_Data.SKU==SKU], X_Data.loc[ID_Data.SKU==SKU], Y_Data.loc[ID_Data.SKU==SKU], 
                                                     tau=tau_, timePeriods=ID_Data.loc[ID_Data.SKU==SKU].sale_yearweek, 
                                                     timePeriodsTestStart=test_start+t-1, model_params=model_params)
            samples[SKU][t], weightfunctions[SKU][t], weightfunctions_times[SKU][t], weights[SKU][t], weights_times[SKU][t] = res

        # Status
        print('SKU '+str(SKU)+' of '+str(len(SKUs))+' in', dt.datetime.now().replace(microsecond=0) - start_time, end='\r', flush=True)
        
    # Save
    _ = joblib.dump(samples, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')  
    _ = joblib.dump(weightfunctions, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weightfunctions_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_times_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_times_tau'+str(tau)+'.joblib')
    print('')

In [None]:
##### RESHAPED #####

# Rolling Horizon Optimization

The code below runs an experiment for all given products (SKUs) $k=1,...,M$ over a test planning horizon $t=1,...,T$ with $T=13$ for three different cost parameter settings $\{K, u, h, b\}$ that vary the critical ratio ($CR=\frac{b}{b+h}$) of holding and backlogging yielding
- $CR=0.50$: $\{K=100, u=0.5, h=1, b=1\}$
- $CR=0.75$: $\{K=100, u=0.5, h=1, b=3\}$
- $CR=0.90$: $\{K=100, u=0.5, h=1, b=9\}$

We run experiments for different choices of the look-ahead $\tau=0,...,4$.

## Experiment functions

We first define several functions for experiments over different choices for the look-ahead $\tau=0,...,4$, cost parameter settings $\{K,u,h,b\}$, products (SKUs) $k=1,...,M$, and periods $t=1,...,T$ of the planning horizon.

In [515]:
### Function to extract global historical demand samples, weights, and actuals for each product and test period
def prep_samples_and_weights(samples, weights=None, e=None, **kwargs):

    """
    ...
    
    """
    
    SKUs = kwargs.get('SKUs', range(1,460+1))
    ts = kwargs.get('ts', range(1,13+1))
    
    # Local
    if len(samples) == len(SKUs):
        
        # Samples
        samples_ = {}
        for SKU in SKUs:
            samples_[SKU] = {}
            for t in ts:
                samples_[SKU][t] = samples[SKU][t]['y_train']
                
        # Actuals
        actuals_ = {}
        for SKU in SKUs:
            actuals_[SKU] = {}
            for t in ts:
                actuals_[SKU][t] = samples[SKU][t]['y_test'].flatten()
                
        # Weights   
        if not weights is None:
            weights_ = {}
            for SKU in SKUs:
                weights_[SKU] = {}
                for t in ts:
                    weights_[SKU][t] = weights[SKU][t].flatten()
                    
        # Epsilons
        if not e is None:
            epsilons_ = {}
            for SKU in SKUs:
                epsilons_[SKU] = {}
                for t in ts:
                    epsilons_[SKU][t] = e*np.std(samples[SKU][t]['y_train'], axis=0).flatten()[0]
                 
    # Global
    else:
        
        # Samples
        samples_ = {}
        for t in ts:
            samples_[t] = samples[t]['y_train']

        # Actuals
        actuals_ = {}
        for SKU in SKUs:
            actuals_[SKU] = {}
            for t in ts:
                actuals_[SKU][t] = samples[t]['y_test'][samples[t]['id_test'].SKU==SKU].flatten()

        # Weights   
        if not weights is None:
            weights_ = {}
            for SKU in SKUs:
                weights_[SKU] = {}
                for t in ts:
                    weights_[SKU][t] = weights[t][samples[t]['id_test'].SKU==SKU].flatten()
                    
        # Epsilons
        if not e is None:
            epsilons_ = {}
            for SKU in SKUs:
                epsilons_[SKU] = {}
                for t in ts:
                    epsilons_[SKU][t] = e*np.std(samples[t]['y_train'][samples[t]['id_train'].SKU == SKU], axis=0).flatten()[0]

    # Return
    if not weights is None:
        if not e is None:
            return samples_, actuals_, weights_, epsilons_
        else:
            return samples_, actuals_, weights_
    else:
        if not e is None:
            return samples_, actuals_, epsilons_
        else:
            return samples_, actuals_

In [520]:
### Function to extract global historical demand samples, weights, and actuals for each product and test period
def prep_samples_and_weights(samples, weights=None, e=None, scalers=None, **kwargs):

    """
    ...
        
    if scalers is provided, rescales ...
    
    if e is provided, generates epsilons (based on rescaled data if scalers is provided ...
    
    ...
    
    """
    
    SKUs = kwargs.get('SKUs', range(1,460+1))
    ts = kwargs.get('ts', range(1,13+1))
           
            
    # Local
    if len(samples) == len(SKUs):
        
        # Samples
        samples_ = {}
        for SKU in SKUs:
            samples_[SKU] = {}
            for t in ts:
                samples_[SKU][t] = samples[SKU][t]['y_train']
                
        # Actuals
        actuals_ = {}
        for SKU in SKUs:
            actuals_[SKU] = {}
            for t in ts:
                actuals_[SKU][t] = samples[SKU][t]['y_test'].flatten()
                
        # Weights   
        if not weights is None:
            weights_ = {}
            for SKU in SKUs:
                weights_[SKU] = {}
                for t in ts:
                    weights_[SKU][t] = weights[SKU][t].flatten()
                    
        # Epsilons
        if not e is None:
            epsilons_ = {}
            for SKU in SKUs:
                epsilons_[SKU] = {}
                for t in ts:
                    y_train = samples[SKU][t]['y_train'].flatten()
                    epsilons_[SKU][t] = e*np.std(y_train)
                    
                    
                    
                 
    # Global
    else:
        
        # Samples - rescaling
        if not scalers is None:
            
            samples_ = {}
            for SKU in SKUs:
                samples_[SKU] = {}
                for t in ts:
                    samples_[SKU][t] = samples[t]['y_train'] * scalers[t][SKU].scalers[0]            
            
        # Samples
        else:
            
            samples_ = {}
            for t in ts:
                samples_[t] = samples[t]['y_train']

        # Actuals - rescaling
        if not scalers is None:
            
            actuals_ = {}
            for SKU in SKUs:
                actuals_[SKU] = {}
                for t in ts:
                    actuals_[SKU][t] = samples[t]['y_test'][samples[t]['id_test'].SKU==SKU].flatten() * scalers[t][SKU].scalers[0]
            
        # Actuals
        else:
            
            actuals_ = {}
            for SKU in SKUs:
                actuals_[SKU] = {}
                for t in ts:
                    actuals_[SKU][t] = samples[t]['y_test'][samples[t]['id_test'].SKU==SKU].flatten()

        # Weights   
        if not weights is None:
            weights_ = {}
            for SKU in SKUs:
                weights_[SKU] = {}
                for t in ts:
                    weights_[SKU][t] = weights[t][samples[t]['id_test'].SKU==SKU].flatten()
                    
        # Epsilons - rescaling
        if (not e is None) and (not scalers is None):
            
            epsilons_ = {}
            for SKU in SKUs:
                epsilons_[SKU] = {}
                for t in ts:
                    y_train = samples[t]['y_train'][samples[t]['id_train'].SKU == SKU].flatten() * scalers[t][SKU].scalers[0]
                    epsilons_[SKU][t] = e*np.std(y_train)
            
        # Epsilons
        elif (not e is None) and (scalers is None):
            
            epsilons_ = {}
            for SKU in SKUs:
                epsilons_[SKU] = {}
                for t in ts:
                    y_train = samples[t]['y_train'][samples[t]['id_train'].SKU == SKU].flatten()
                    epsilons_[SKU][t] = e*np.std(y_train)

    # Return
    if not weights is None:
        if not e is None:
            return samples_, actuals_, weights_, epsilons_
        else:
            return samples_, actuals_, weights_
    else:
        if not e is None:
            return samples_, actuals_, epsilons_
        else:
            return samples_, actuals_

In [447]:
### Function to run an experiment over a list of given cost parameter settings and the specified model
def run_experiment(wsaamodel, cost_params, actuals, samples=None, weights=None, epsilons=None, print_progress=False,
                   path_to_save=None, name_to_save=None, return_results=True, **kwargs):
    
    """
    ...
    
    """
    
    # Raise error if cost_params is not a list of dict(s)
    if not type(cost_params)==list:
        raise ValueError('Argument cost_params has to be a list of at least one dict with keys {K, u, h, b}')  
    
    # Timer
    st_exec, st_cpu = time.time(), time.process_time()

    # Status
    if print_progress and 'SKU' in kwargs: print('SKU:', kwargs['SKU'])
    
    # Initialize
    ropt, results = RollingHorizonOptimization(), pd.DataFrame()

    # For each cost param setting
    for cost_params_ in cost_params:

        # Print progress
        if print_progress: print('...cost param setting:', cost_params_)
        
        # Check if samples provided
        if not samples is None:
            
            # Apply (Weighted) SAA  model
            wsaamodel.set_params(**{**kwargs, **cost_params_})
            result = ropt.run(wsaamodel, samples, actuals, weights, epsilons)
             
            # Get T
            T = len(samples)
            
        else:
            
            # Apply ex-post clairvoyant model
            wsaamodel.set_params(**{**kwargs, **cost_params_})
            result = ropt.run_expost(wsaamodel, actuals)
            
            # Get T
            T = actuals.shape[1]

        # Store results
        meta = pd.DataFrame({'CR': cost_params_['CR'], **kwargs}, index=list(range(T)))
        results = pd.concat([results, pd.concat([meta, result], axis=1)], axis=0)

    # Save result as csv file
    if not path_to_save is None and not name_to_save is None:
        results.to_csv(path_or_buf=(path_to_save+'/'+name_to_save+'_SKU'+str(kwargs.get('SKU', None))+
                                    '_tau'+str(kwargs.get('tau', None))+'.csv'), sep=',', index=False)

    # Timer
    exec_time_sec, cpu_time_sec = time.time() - st_exec, time.process_time() - st_cpu
    
    # Status
    if print_progress: print('>>>> Done:', str(np.around(exec_time_sec/60,1)), 'minutes')

    # Return  
    return results if return_results else {'SKU': kwargs.get('SKU', None), 'exec_time_sec': exec_time_sec, 'cpu_time_sec': cpu_time_sec}

In [448]:
### Context manager (Credits: 'https://stackoverflow.com/questions/24983493/tracking-progress-of-joblib-parallel-execution')
@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        def __call__(self, *args, **kwargs):
            tqdm_object.update(n=self.batch_size)
            return super().__call__(*args, **kwargs)

    old_batch_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_batch_callback
        tqdm_object.close()

## (a) Rolling Horizon Global Weighted SAA (GwSAA)

...

In [31]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_not_reshaped'
weightsmodel_name = 'rfwm_global_not_reshaped'

# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_not_reshaped_old_rf_params'
weightsmodel_name = 'rfwm_global_not_reshaped_old_rf_params'

In [32]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,

    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/GwSAA_old_params',
    'name_to_save': 'GwSAA_old_params',
    'print_progress': False,
    'return_results': False

}

In [33]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    # Prepare data
    samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
    weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

    samples, actuals, weights = prep_samples_and_weights(samples, weights, SKUs=SKUs, ts=ts)
    
    # For each product (SKU) k=1,...,M
    with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
        resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=WeightedSAA(), 
                                                                 samples=samples, weights=weights[SKU], actuals=actuals[SKU], 
                                                                 **experiment_params) for SKU in SKUs)

Look-ahead tau=0...


Progress: 100%|██████████| 460/460 [00:52<00:00,  8.68it/s]


Look-ahead tau=1...


Progress: 100%|██████████| 460/460 [07:57<00:00,  1.04s/it]


Look-ahead tau=2...


Progress: 100%|██████████| 460/460 [29:13<00:00,  3.81s/it] 


Look-ahead tau=3...


Progress: 100%|██████████| 460/460 [47:53<00:00,  6.25s/it]  


Look-ahead tau=4...


Progress: 100%|██████████| 460/460 [1:25:29<00:00, 11.15s/it]


In [493]:
#### RESHAPED WITH OLD HYPER PARAMS AND SCALING
weightsmodel_name = 'rfwm_global_r_z_old_hyper_params'

In [494]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,

    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/GwSAA_r_z_old_params',
    'name_to_save': 'GwSAA_r_z_old_params',
    'print_progress': False,
    'return_results': False

}

In [495]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    # Prepare data

    # Scaled samples and weights
    samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
    weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

    samples_, actuals_, weights = prep_samples_and_weights(samples, weights, SKUs=SKUs, ts=ts)

    # Unscaled samples, unscaled actuals
    samples = {}
    actuals = {}
    for SKU in SKUs:
        samples[SKU] = {}
        actuals[SKU] = {}
        for t in ts:
            samples[SKU][t] = samples_[t] * y_scalers[t][SKU].scalers[0]
            actuals[SKU][t] = actuals_[SKU][t] * y_scalers[t][SKU].scalers[0]
    
    # For each product (SKU) k=1,...,M
    with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
        resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=WeightedSAA(), 
                                                                 samples=samples[SKU], weights=weights[SKU], actuals=actuals[SKU], 
                                                                 **experiment_params) for SKU in SKUs)

Look-ahead tau=0...


Progress: 100%|██████████| 460/460 [03:00<00:00,  2.55it/s]


Look-ahead tau=1...


Progress: 100%|██████████| 460/460 [14:42<00:00,  1.92s/it]


Look-ahead tau=2...


Progress: 100%|██████████| 460/460 [30:44<00:00,  4.01s/it]


Look-ahead tau=3...


Progress: 100%|██████████| 460/460 [39:51<00:00,  5.20s/it]  


Look-ahead tau=4...


Progress: 100%|██████████| 460/460 [52:11<00:00,  6.81s/it]  


In [None]:
# for tau in taus:
#     for t in ts:
#         tau_=min(tau,T-t)
        
#         prep data ...
        
#         weightsmodel.fit(X, y)
        
#         for SKU in SKUs:
            
#             prep data ...
            
#             weightsmodel.predict(x)
            
#             q, status, solutions, gap = weightedsaa.apply()
            
#             I = I + q - d

## (b) Rolling Horizon Global Robust Weighted SAA (GwSAA-R)

...

In [37]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_not_reshaped'
weightsmodel_name = 'rfwm_global_not_reshaped'

# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_not_reshaped_old_rf_params'
weightsmodel_name = 'rfwm_global_not_reshaped_old_rf_params'

In [38]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,

    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/GwSAAR',
    'name_to_save_prefix': 'GwSAAR',
    'print_progress': False,
    'return_results': False

}

In [None]:
# For each uncertainty set specification
for e in [1,3,6,9,12]:
    
    # Print:
    print('Uncertainty set parameter e='+str(e)+'...')
    
    # Update params
    experiment_params['name_to_save'] = experiment_params['name_to_save_prefix']+'_e'+str(e).replace('.', '')
    
    # Set path
    if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

    # For each look-ahead tau=0,...,4
    for tau in taus:

        # Print:
        print('...look-ahead tau='+str(tau)+'...')

        # Prepare data
        samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
        weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

        samples, actuals, weights, epsilons = prep_samples_and_weights(samples, weights, e=e, SKUs=SKUs, ts=ts)

        # For each product (SKU) k=1,...,M
        with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
            resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=RobustWeightedSAA(), 
                                                                     samples=samples, weights=weights[SKU], epsilons=epsilons[SKU],
                                                                     actuals=actuals[SKU], e=e, **experiment_params) for SKU in SKUs)

Look-ahead tau=0...


Progress: 100%|██████████| 460/460 [01:44<00:00,  4.41it/s]


Look-ahead tau=1...


Progress: 100%|██████████| 460/460 [18:33<00:00,  2.42s/it]


Look-ahead tau=2...


Progress: 100%|██████████| 460/460 [52:49<00:00,  6.89s/it]  


Look-ahead tau=3...


Progress: 100%|██████████| 460/460 [1:25:25<00:00, 11.14s/it]


Look-ahead tau=4...


Progress: 100%|██████████| 460/460 [2:29:51<00:00, 19.55s/it]  


Look-ahead tau=0...


Progress: 100%|██████████| 460/460 [01:44<00:00,  4.38it/s]


Look-ahead tau=1...


Progress: 100%|██████████| 460/460 [18:55<00:00,  2.47s/it]


Look-ahead tau=2...


Progress: 100%|██████████| 460/460 [52:41<00:00,  6.87s/it]  


Look-ahead tau=3...


Progress: 100%|██████████| 460/460 [1:24:59<00:00, 11.09s/it]


Look-ahead tau=4...


Progress: 100%|██████████| 460/460 [2:29:59<00:00, 19.56s/it]  


Look-ahead tau=0...


Progress: 100%|██████████| 460/460 [01:45<00:00,  4.37it/s]


Look-ahead tau=1...


Progress:  36%|███▋      | 167/460 [06:43<13:43,  2.81s/it]

In [521]:
#### RESHAPED WITH OLD HYPER PARAMS AND SCALING
weightsmodel_name = 'rfwm_global_r_z_old_hyper_params'

In [522]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,

    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/GwSAAR_r_z_old_params',
    'name_to_save_prefix': 'GwSAAR_r_z_old_params',
    'print_progress': False,
    'return_results': False

}

In [524]:
# For each uncertainty set specification
for e in [1,3,6,9,12]:
    
    # Print:
    print('Uncertainty set parameter e='+str(e)+'...')
    
    # Update params
    experiment_params['name_to_save'] = experiment_params['name_to_save_prefix']+'_e'+str(e).replace('.', '')
    
    # Set path
    if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

    # For each look-ahead tau=0,...,4
    for tau in taus:
        
        # Print:
        print('...look-ahead tau='+str(tau)+'...')

        # Prepare data
        samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
        weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

        samples, actuals, weights, epsilons = prep_samples_and_weights(samples, weights, e=e, scalers=y_scalers, SKUs=SKUs, ts=ts)
        
        # For each product (SKU) k=1,...,M
        with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
            resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=RobustWeightedSAA(), 
                                                                     samples=samples[SKU], weights=weights[SKU], epsilons=epsilons[SKU],
                                                                     actuals=actuals[SKU], e=e, **experiment_params) for SKU in SKUs)

Uncertainty set parameter e=1...
...look-ahead tau=0...


Progress: 100%|██████████| 460/460 [06:35<00:00,  1.16it/s]


...look-ahead tau=1...


Progress: 100%|██████████| 460/460 [23:09<00:00,  3.02s/it] 


...look-ahead tau=2...


Progress: 100%|██████████| 460/460 [42:07<00:00,  5.49s/it]  


...look-ahead tau=3...


Progress: 100%|██████████| 460/460 [59:11<00:00,  7.72s/it]  


...look-ahead tau=4...


Progress: 100%|██████████| 460/460 [1:21:06<00:00, 10.58s/it]


Uncertainty set parameter e=3...
...look-ahead tau=0...


Progress: 100%|██████████| 460/460 [06:30<00:00,  1.18it/s]


...look-ahead tau=1...


Progress: 100%|██████████| 460/460 [22:40<00:00,  2.96s/it] 


...look-ahead tau=2...


Progress: 100%|██████████| 460/460 [41:56<00:00,  5.47s/it]  


...look-ahead tau=3...


Progress: 100%|██████████| 460/460 [58:51<00:00,  7.68s/it]  


...look-ahead tau=4...


Progress: 100%|██████████| 460/460 [1:21:30<00:00, 10.63s/it]


Uncertainty set parameter e=6...
...look-ahead tau=0...


Progress: 100%|██████████| 460/460 [06:35<00:00,  1.16it/s]


...look-ahead tau=1...


Progress: 100%|██████████| 460/460 [22:20<00:00,  2.91s/it] 


...look-ahead tau=2...


Progress: 100%|██████████| 460/460 [41:17<00:00,  5.39s/it]  


...look-ahead tau=3...


Progress: 100%|██████████| 460/460 [58:42<00:00,  7.66s/it]  


...look-ahead tau=4...


Progress: 100%|██████████| 460/460 [1:20:34<00:00, 10.51s/it]


Uncertainty set parameter e=9...
...look-ahead tau=0...


Progress: 100%|██████████| 460/460 [06:38<00:00,  1.15it/s]


...look-ahead tau=1...


Progress: 100%|██████████| 460/460 [22:14<00:00,  2.90s/it] 


...look-ahead tau=2...


Progress: 100%|██████████| 460/460 [41:32<00:00,  5.42s/it]  


...look-ahead tau=3...


Progress: 100%|██████████| 460/460 [59:03<00:00,  7.70s/it]  


...look-ahead tau=4...


Progress: 100%|██████████| 460/460 [1:21:17<00:00, 10.60s/it]


Uncertainty set parameter e=12...
...look-ahead tau=0...


Progress: 100%|██████████| 460/460 [06:35<00:00,  1.16it/s]


...look-ahead tau=1...


Progress: 100%|██████████| 460/460 [22:11<00:00,  2.89s/it]


...look-ahead tau=2...


Progress: 100%|██████████| 460/460 [41:13<00:00,  5.38s/it]  


...look-ahead tau=3...


Progress: 100%|██████████| 460/460 [58:15<00:00,  7.60s/it]  


...look-ahead tau=4...


Progress: 100%|██████████| 460/460 [1:21:18<00:00, 10.61s/it]  


## (c) Rolling Horizon Local Weighted SAA (wSAA)

...

In [None]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_local_not_reshaped'
weightsmodel_name = 'rfwm_local_not_reshaped'

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/wSAA',
    'name_to_save': 'wSAA',
    'print_progress': False,
    'return_results': False

}

In [None]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    # Prepare data
    samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
    weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

    samples, actuals, weights = prep_samples_and_weights(samples, weights, SKUs=SKUs, ts=ts)
    
    # For each product (SKU) k=1,...,M
    with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
        resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=WeightedSAA(), 
                                                                 samples=samples[SKU], weights=weights[SKU], actuals=actuals[SKU], 
                                                                 **experiment_params) for SKU in SKUs)

## (d) Rolling Horizon Local Robust Weighted SAA (wSAA-R)

...

In [None]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_local_not_reshaped'
weightsmodel_name = 'rfwm_local_not_reshaped'

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/wSAAR',
    'name_to_save_prefix': 'wSAAR',
    'print_progress': False,
    'return_results': False

}

In [None]:
# For each uncertainty set specification
for e in [1,3,6,9,12]:
    
    # Print:
    print('Uncertainty set parameter e='+str(e)+'...')
        
    # Update params
    experiment_params['name_to_save'] = experiment_params['name_to_save_prefix']+'_e'+str(e).replace('.', '')
    
    # Set path
    if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

    # For each look-ahead tau=0,...,4
    for tau in taus:

        # Print:
        print('...look-ahead tau='+str(tau)+'...')

        # Prepare data
        samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
        weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

        samples, actuals, weights, epsilons = prep_samples_and_weights(samples, weights, e=e, SKUs=SKUs, ts=ts)

        # For each product (SKU) k=1,...,M
        with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
            resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=RobustWeightedSAA(), 
                                                                     samples=samples[SKU], weights=weights[SKU], epsilons=epsilons[SKU],
                                                                     actuals=actuals[SKU], e=e, **experiment_params) for SKU in SKUs)

## (e) Baseline model: Rolling Horizon Local Weighted SAA (SAA)

...

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/SAA',
    'name_to_save': 'SAA',
    'print_progress': False,
    'return_results': False

}

In [None]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    # Prepare data
    samples = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_local_samples_not_reshaped_tau'+str(tau)+'.joblib')
    
    samples, actuals = prep_samples_and_weights(samples, SKUs=SKUs, ts=ts)
    
    # For each product (SKU) k=1,...,M
    with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
        resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=WeightedSAA(), 
                                                                 samples=samples[SKU], actuals=actuals[SKU], 
                                                                 **experiment_params) for SKU in SKUs)

## (f) Ex-post optimal model with deterministic demand

...

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/ExPost',
    'name_to_save': 'ExPost',
    'print_progress': False,
    'return_results': False

}

In [None]:
# Prepare data
samples = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_local_samples_not_reshaped_tau'+str(0)+'.joblib')
actuals = {}
for SKU in SKUs:
    d = []
    for t in ts:
        d = d + [samples[SKU][t]['y_test'].item()]
    actuals[SKU] = np.array(d).reshape(1,len(d))

In [None]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each product (SKU) k=1,...,M
with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
    resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(SKU=SKU, wsaamodel=WeightedSAA(), actuals=actuals[SKU], **experiment_params) for SKU in SKUs)