# Imports

In [1]:
# Import utils
import numpy as np
import pandas as pd
import copy
import time
import datetime as dt
import pickle
import json
from pathlib import Path
import joblib
from joblib import dump, load, Parallel, delayed
import os
import itertools
import contextlib
from tqdm import tqdm

from sklearn.preprocessing import MinMaxScaler

# Import Weights Model
import WeightsModel3
from WeightsModel3 import PreProcessing
from WeightsModel3 import RandomForestWeightsModel
from WeightsModel3 import RollingHorizonGlobalTrainingAndSampling

# Import (Rolling Horizon) Weighted SAA models
from WeightedSAA3 import WeightedSAA
from WeightedSAA3 import RobustWeightedSAA
from WeightedSAA3 import RobustWeightedSAA2
from WeightedSAA3 import RollingHorizonOptimization

# General paths and parameters

In [203]:
# Set folder names as global variables
os.chdir('/home/fesc/MM/')
global PATH_DATA, PATH_PARAMS, PATH_KERNELS, PATH_SAMPLES, PATH_RESULTS

PATH_DATA = '/home/fesc/MM/Data'
PATH_PARAMS  = '/home/fesc/MM/Data/Params'
PATH_WEIGHTSMODEL = '/home/fesc/MM/Data/WeightsModel'
PATH_SAMPLES = '/home/fesc/MM/Data/Samples'
PATH_RESULTS = '/home/fesc/MM/Data/Results'

In [204]:
# Time period and SKU ranges
T = 13                  # Planning horizon T
ts = range(1,13+1)      # Periods t=1,...,T of the planning horizon
taus = range(0,4+1)     # Look-aheads tau=0,...,4 to use
SKUs = range(1,460+1)   # Products (SKUs) k=1,...,M
es = [1,3,6,9,12]
# Train/test split (first timePeriods of testing horizon)
test_start = 114

# Cost param settings
cost_params = [

    {'CR': 0.50, 'K': 100, 'u': 0.5, 'h': 1, 'b': 1},
    {'CR': 0.75, 'K': 100, 'u': 0.5, 'h': 1, 'b': 3},
    {'CR': 0.90, 'K': 100, 'u': 0.5, 'h': 1, 'b': 9}

]

pp = PreProcessing()

# Training and Sampling

## Global Training and Samping

The two global models (using 'Global Training and Sampling') are **Rolling Horizon Global Weighted SAA (GwSAA)**, which is our model, and **Rolling Horizon Global Robust Weighted SAA (GwSAA-R)**, which is the analogous model with robust extension.

Given product $k$, period $t$, and look-ahead $\tau$, both models apply Weighted SAA over the 'global' distribution $\{\{w_{j,t,\tau}^{\,i}(x_{k,t}^{\,i}),(d_{j,t}^{\,i},...,d_{j,t+\tau}^{\,i})\}_{i=1}^{N_{j,t,\tau}}\}_{j=1}^{M}$, with weight functions $w_{j,t,\tau}(\,\cdot\,)$ trained (once for all products) on data $S_{t,\tau}^{\,\text{Global}}=\{\{(x_{j,t}^{\,i},d_{j,t}^{\,i},...,d_{j,t+\tau}^{\,i})\}_{i=1}^{N_{j,t,\tau}}\}_{j=1}^{M}$.

### Data Preprocessing

We first load and pre-process the data. This includes reshaping demand time series into $(\tau+1)$-periods rolling look-ahead horizon sequences.

- **ID_Data** (pd.DataFrame) stores identifiers (in particular the product (SKU) identifier and the timePeriod (sale_yearweek) identifier)
- **X_Data** (pd.DataFrame) is the 'feature matrix', i.e., each row is a feature vector $x_{j,n}$ where n is the number of training observations (rows) in the data
- **Y_Data** (pd.DataFrame) is the demand data $d_{j,n}$ (a times series per product)
- **X_Data_Columns** (pd.DataFrame) provides 'selectors' for local vs. global feature sets

In [205]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_z'
weightsmodel_name = 'rfwm_global_z'

In [206]:
# Read data
ID_Data = pd.read_csv(PATH_DATA+'/ID_Data.csv')
X_Data = pd.read_csv(PATH_DATA+'/X_Data.csv')
X_Data_Columns = pd.read_csv(PATH_DATA+'/X_Data_Columns2.csv')
Y_Data = pd.read_csv(PATH_DATA+'/Y_Data.csv')

In [207]:
# Select features
X_Data_Columns = X_Data_Columns.loc[X_Data_Columns.Global == 'YES']
X_Data = X_Data[X_Data_Columns.Feature.values]

In [208]:
# Select training data
ID_Data_train = ID_Data.loc[ID_Data.sale_yearweek < test_start]
X_Data_train = X_Data.loc[ID_Data.sale_yearweek < test_start]

# Prepare
vars_to_scale_names = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES', 'Feature'].values
vars_to_scale_with_names = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES', 'ScaleWith'].values

vars_to_scale = np.array(X_Data[vars_to_scale_names])
vars_to_scale_with = np.array(X_Data_train[vars_to_scale_with_names])

vars_to_scale_groups = np.array(ID_Data.SKU)
vars_to_scale_with_groups = np.array(ID_Data_train.SKU)

# Fit and transform
scaler = MinMaxScaler()
vars_scaled, scaler_fitted = pp.scale_variables(vars_to_scale, vars_to_scale_with, vars_to_scale_groups, vars_to_scale_with_groups, scaler)

# Save fitted feature scaler
_ = joblib.dump(scaler_fitted, PATH_RESULTS+'/'+weightsmodel_name+'_feature_scaler.joblib')

# Reshape to original data
vars_scaled = pd.concat([pd.DataFrame(vars_scaled[i], columns=vars_to_scale_names) for i in vars_scaled]).reset_index(drop=True)
X_Data_z = copy.deepcopy(X_Data)
for col in vars_scaled.columns:
    X_Data_z[col] = vars_scaled[col]

In [209]:
## Scale demand

In [210]:
# Select training data
ID_Data_train = ID_Data.loc[ID_Data.sale_yearweek < test_start]
Y_Data_train = Y_Data.loc[ID_Data.sale_yearweek < test_start]

# Prepare
vars_to_scale = np.array(Y_Data)
vars_to_scale_with = np.array(Y_Data_train)

vars_to_scale_groups = np.array(ID_Data.SKU)
vars_to_scale_with_groups = np.array(ID_Data_train.SKU)

# Fit and transform
scaler = MinMaxScaler()
vars_scaled, scaler_fitted = pp.scale_variables(vars_to_scale, vars_to_scale_with, vars_to_scale_groups, vars_to_scale_with_groups, scaler)

# Save fitted feature scaler
_ = joblib.dump(scaler_fitted, PATH_RESULTS+'/'+weightsmodel_name+'_demand_scaler.joblib')

# Reshape to original data
Y_Data_z = pd.concat([pd.DataFrame(vars_scaled[i], columns=['Y']) for i in vars_scaled]).reset_index(drop=True)

In [211]:
## Reshape

In [212]:
# Create multi-period demand vectors
data = pd.concat([ID_Data, Y_Data_z], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
    
Y_Data_z = pd.DataFrame(Y)

data = pd.concat([ID_Data, Y_Data], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
    
Y_Data = pd.DataFrame(Y)

### Weights model

The weights models - and thus the data used, weight functions, and weights per sample - are the same for the two global models **GwSAA** and **GwSAA-R**. First, we tune the hyper parameters of the random forest weights model for each given look-ahead $\tau$ (as for each look-ahead $\tau$ we have a different response for the multi-output random forest regressor). Second, we fit all weight functions (for each look-ahead $\tau=0,...,4$ and over periods $t=1,...,T$) and generate all weights (for each look-ahead $\tau=0,...,4$, over periods $t=1,...,T$, and for each product (SKU) $k=1,...,M$).

#### Tune weights model

To tune the hyper parameters of the global random forest weights model, we use 3-fold rolling timeseries cross-validation on the training data and perform random search with 100 iterations over the specified hyper parameter search grid.

In [None]:
# Set parameters
model_params = {
    'oob_score': True,
    'random_state': 12345,
    'n_jobs': 4,
    'verbose': 0
}

hyper_params_grid = {
    'n_estimators': [1000],
    'max_depth': [None],
    'min_samples_split': [x for x in range(20, 1000, 20)],  
    'min_samples_leaf': [x for x in range(10, 1000, 10)],  
    'max_features': [x for x in range(8, 256, 8)],   
    'max_leaf_nodes': [None],
    'min_impurity_decrease': [0.0],
    'bootstrap': [True],
    'max_samples': [0.75, 0.80, 0.85, 0.90, 0.95, 1.00]
}    


tuning_params = {     
    'n_iter': 100,
    'scoring': {'MSE': 'neg_mean_squared_error'},
    'return_train_score': True,
    'refit': 'MSE',
    'random_state': 12345,
    'n_jobs': 8,
    'verbose': 2
}    

random_search = True
print_status = True

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Initialize preprocessing module
    pp = PreProcessing()
        
    # Select and reshape training and test data
    args = {'train': (ID_Data.sale_yearweek < test_start), 
            'timePeriods': ID_Data.loc[(ID_Data.sale_yearweek < test_start)].sale_yearweek, 
            'maxTimePeriod': test_start-1, 'tau': tau}
    
    id_train = pp.train_test_split(ID_Data, **args)
    X_train = pp.train_test_split(X_Data_z, **args, to_array=True)
    y_train = pp.train_test_split(Y_Data_z, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)
   
    # Initialize
    weightsmodel = RandomForestWeightsModel(model_params)

    # CV search
    cv_folds = pp.split_timeseries_cv(n_splits=3, timePeriods=id_train.sale_yearweek)
    cv_results = weightsmodel.tune(X_train, y_train, cv_folds, hyper_params_grid, tuning_params, random_search, print_status)
    weightsmodel.save_cv_result(path=PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau)+'.joblib')

#### Fit weight functions and generate weights

We now fit the global random forest weights model (i.e., the weight functions) for each $\tau=0,...,4$ and over periods $t=1,...,T$. This is done across all products at once (global training). Then, for each $\tau=0,...,4$ and over periods $t=1,...,T$, we generate for each product (SKU) $k=1,...,M$ the weights given the test feature $x_{k,t}$. This is done *jointly* across products (by using $x_{t}=(x_{1,t},...,x_{M,t})^{\top}$) for computational efficiency - the weights for each individual product can be extracted afterwards.

In [213]:
# Set parameters
model_params = {
    'n_jobs': 32,
    'verbose': 0
}

print_status = True

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Initialize
    samples, weightfunctions, weightfunctions_times, weights, weights_times = {}, {}, {}, {}, {}
        
    # For each period t=1,...,T
    for t in ts:

        # Adjust look-ahead tau to account for end of horizon
        tau_ = min(tau,T-t)
        
        # Status
        print('#### Look-ahead tau='+str(tau)+' (tau\'='+str(tau_)+'), period t='+str(t)+'...')
        start_time = dt.datetime.now().replace(microsecond=0)
                
        # Generate samples, fit weight functions, and generate weights (based on tuned weights model)
        weightsmodel = RandomForestWeightsModel()
        weightsmodel.load_cv_result(path=PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau_)+'.joblib')
        res = weightsmodel.training_and_sampling(ID_Data, X_Data_z, Y_Data_z, tau=tau_, timePeriods=ID_Data.sale_yearweek,
                                                 timePeriodsTestStart=test_start+t-1, model_params=model_params)
        samples[t], weightfunctions[t], weightfunctions_times[t], weights[t], weights_times[t] = res
        
        # Status
        print('...done in', dt.datetime.now().replace(microsecond=0) - start_time)    
        
    # Save
    _ = joblib.dump(samples, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')  
    _ = joblib.dump(weightfunctions, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weightfunctions_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_times_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_times_tau'+str(tau)+'.joblib')    

In [None]:
###### TESTING

In [232]:
tau=0
t=1


model_params = {
    'oob_score': True,
    'random_state': 12345,
    'n_jobs': 32,
    'verbose': 0
}

hyper_params = {
    'n_estimators': 500,
    'min_samples_split': 20,
    'min_samples_leaf': 10,
    'max_features': 3,
    'min_impurity_decrease': 0.0,
    'bootstrap': True,
    'max_samples': 0.90
}

weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
X = np.array(X_Data_z.loc[ID_Data.sale_yearweek < 114][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']])
y = np.array(Y_Data_z.loc[ID_Data.sale_yearweek < 114]['Y0'])
wm_z = weightsmodel.fit(X, y)


weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
X = np.array(X_Data.loc[ID_Data.sale_yearweek < 114][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']])
y = np.array(Y_Data.loc[ID_Data.sale_yearweek < 114]['Y0'])
wm = weightsmodel.fit(X, y)

In [233]:
wm_z.weightsmodel.oob_score_

0.24990111206413546

In [234]:
wm.weightsmodel.oob_score_

0.9082096963101499

In [260]:
### Also when applied for one SKU, i.e., a local model with scaled values, the performance is very bad ...
weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
X = np.array(X_Data_z.loc[(ID_Data.sale_yearweek < 114) & (ID_Data.SKU == 5)][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']])
y = np.array(Y_Data_z.loc[(ID_Data.sale_yearweek < 114) & (ID_Data.SKU == 5)]['Y0'])
wm_z = weightsmodel.fit(X, y)
y_pred_z = weightsmodel.weightsmodel.predict(X)
mse_z = np.mean((scaler_fitted[5].inverse_transform(y_pred_z.reshape(-1,1)).flatten() - scaler_fitted[5].inverse_transform(y.reshape(-1,1)).flatten())**2)

weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
X = np.array(X_Data.loc[(ID_Data.sale_yearweek < 114) & (ID_Data.SKU == 5)][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']])
y = np.array(Y_Data.loc[(ID_Data.sale_yearweek < 114) & (ID_Data.SKU == 5)]['Y0'])
wm = weightsmodel.fit(X, y)
y_pred = weightsmodel.weightsmodel.predict(X)
mse = np.mean((y_pred-y)**2)

In [261]:
wm_z.weightsmodel.oob_score_

0.18939986330527836

In [262]:
wm.weightsmodel.oob_score_

0.7209491946930937

In [263]:
mse_z, mse

(372970.9181515987, 147819.47787353792)

In [304]:
#### Test if scaaling is wrong by performin manual scaling
X = X_Data.loc[(ID_Data.sale_yearweek < 114) & (ID_Data.SKU == 5)][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']]
y = Y_Data.loc[(ID_Data.sale_yearweek < 114) & (ID_Data.SKU == 5)]['Y0']

In [305]:
scaler = MinMaxScaler()

In [306]:
fitted_scaler_X = scaler.fit(np.array(pd.DataFrame({'y1': y, 'y2': y, 'y3': y, 'y4': y, 'y5': y})))

In [307]:
X_z = fitted_scaler_X.transform(np.array(X))

In [308]:
fitted_scaler_y = scaler.fit(np.array(y).reshape(-1,1))

In [311]:
y_z = fitted_scaler_y.transform(np.array(y).reshape(-1,1)).flatten()

In [315]:
weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
wm_z = weightsmodel.fit(X_z, y_z)
y_pred_z = weightsmodel.weightsmodel.predict(X_z)
mse_z = np.mean((fitted_scaler_y.inverse_transform(y_pred_z.reshape(-1,1)).flatten() - np.array(y))**2)

In [316]:
mse_z

148001.45258743598

In [317]:
weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
wm = weightsmodel.fit(np.array(X), np.array(y))
y_pred = weightsmodel.weightsmodel.predict(np.array(X))
mse = np.mean((y_pred-np.array(y))**2)

In [318]:
mse

147819.4778735379

In [319]:
wm_z.weightsmodel.oob_score_

0.7202067750194299

In [320]:
wm.weightsmodel.oob_score_

0.7209491946930937

In [321]:
# ... this seems to work now...

In [322]:
#### Let's try for a global model

In [330]:
#### Test if scaling is wrong by performin manual scaling
X_Data_z = pd.DataFrame()
Y_Data_z = pd.DataFrame()
fitted_scalers_X = {}
fitted_scalers_y = {}
for SKU in SKUs:
    X = X_Data.loc[(ID_Data.sale_yearweek<114) & (ID_Data.SKU==SKU)][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']]
    y = Y_Data.loc[(ID_Data.sale_yearweek<114) & (ID_Data.SKU==SKU)]['Y0']
    
    scaler = MinMaxScaler()
    fitted_scaler_X = scaler.fit(np.array(pd.DataFrame({'y1': y, 'y2': y, 'y3': y, 'y4': y, 'y5': y})))
    X_z = fitted_scaler_X.transform(np.array(X))
    fitted_scalers_X[SKU] = copy.deepcopy(fitted_scaler_X)
    X_Data_z = pd.concat([X_Data_z, pd.DataFrame(X_z, columns = ['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5'])])
    
    scaler = MinMaxScaler()
    fitted_scaler_y = scaler.fit(np.array(y).reshape(-1,1))
    y_z = fitted_scaler_y.transform(np.array(y).reshape(-1,1))
    fitted_scalers_y[SKU] = copy.deepcopy(fitted_scaler_y)
    Y_Data_z = pd.concat([Y_Data_z, pd.DataFrame(y_z, columns = ['Y0'])])   
    
X_Data_z = X_Data_z.reset_index(drop=True)
Y_Data_z = Y_Data_z.reset_index(drop=True)

In [331]:
weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
wm_z = weightsmodel.fit(np.array(X_Data_z), np.array(Y_Data_z).flatten())
y_pred_z = weightsmodel.weightsmodel.predict(np.array(X_Data_z))

In [358]:
weightsmodel = RandomForestWeightsModel(model_params=model_params, hyper_params=hyper_params)
wm = weightsmodel.fit(np.array(X_Data.loc[ID_Data.sale_yearweek<114][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']]), 
                      np.array(Y_Data.loc[ID_Data.sale_yearweek<114]['Y0']).flatten())
y_pred = weightsmodel.weightsmodel.predict(np.array(X_Data.loc[ID_Data.sale_yearweek<114][['SKU_wqs_lag1','SKU_wqs_lag2','SKU_wqs_lag3','SKU_wqs_lag4','SKU_wqs_lag5']]))

In [None]:
Y_Data_pred_z = pd.DataFrame()
for SKU in SKUs:
    y_pred_zz = fitted_scalers_y[SKU].inverse_transform(np.array(y_pred_z[ID_Data.loc[ID_Data.sale_yearweek<114].SKU==SKU]).reshape(-1,1))
    Y_Data_pred_z = pd.concat([Y_Data_pred_z, pd.DataFrame(y_pred_zz, columns = ['Y0'])])   
    
Y_Data_pred_z = Y_Data_pred_z.reset_index(drop=True)

In [None]:
Y_Data_pred = pd.DataFrame(y_pred, columns = ['Y0'])

In [342]:
Y_Data_true = pd.DataFrame()
for SKU in SKUs:
    Y_Data_true = pd.concat([Y_Data_true, pd.DataFrame(Y_Data.loc[(ID_Data.sale_yearweek<114) & (ID_Data.SKU==SKU)]['Y0'], columns = ['Y0'])])    
    
Y_Data_true = Y_Data_true.reset_index(drop=True)

In [361]:
mse_z = []
mse = []
for SKU in SKUs:
    y_pred_zz_SKU = fitted_scalers_y[SKU].inverse_transform(np.array(y_pred_z[ID_Data.loc[ID_Data.sale_yearweek<114].SKU==SKU]).reshape(-1,1)).flatten()
    y_pred_SKU = y_pred[ID_Data.loc[ID_Data.sale_yearweek<114].SKU==SKU].flatten()
    y_true_SKU = np.array(Y_Data.loc[(ID_Data.sale_yearweek<114) & (ID_Data.SKU==SKU)]['Y0']).flatten()
    
    mse_z += [np.mean((y_pred_zz_SKU-y_true_SKU)**2)]
    mse += [np.mean((y_pred_SKU-y_true_SKU)**2)]

In [367]:
results = pd.DataFrame({'SKU': SKUs, 'mse_z': mse_z, 'mse': mse})

In [369]:
results['diffs'] = results.mse_z / results.mse

In [370]:
results.describe()

Unnamed: 0,SKU,mse_z,mse,diffs
count,460.0,460.0,460.0,460.0
mean,230.5,20582560.0,1823364.0,5.838786
std,132.934821,276498000.0,20477530.0,6.716479
min,1.0,0.0682324,0.7274181,0.083095
25%,115.75,74.06972,27.94942,1.808917
50%,230.5,1267.851,377.5251,3.132171
75%,345.25,48135.0,6458.503,7.291291
max,460.0,5567112000.0,340136400.0,46.511272


In [371]:
### Seems that on global level, still MSE is really poor ... At least with these limited features ...

In [None]:
weightsmodel_name

In [None]:
samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')    

In [None]:
weightfunctions = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib')

In [None]:
samples_ = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_samples_tau'+str(tau)+'.joblib') 
weightfunctions_ = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_weightfunctions_tau'+str(tau)+'.joblib')

In [None]:
y_pred_z = weightfunctions[1].weightsmodel.predict(samples[1]['X_test'])

In [None]:
y_pred = weightfunctions_[1].weightsmodel.predict(samples_[1]['X_test'])

In [None]:
y_pred_zz = []
for SKU in SKUs:
    y_pred_zz += [scaler_fitted[SKU].inverse_transform(y_pred_z[samples_[1]['id_test'].SKU == SKU].reshape(1,1)).item()]

In [None]:
res = pd.DataFrame({'SKU': samples_[1]['id_test'].SKU, 'y': samples_[1]['y_test'], 'y_pred_z': np.array(y_pred_zz), 'y_pred': y_pred}) 

In [None]:
res

In [None]:
res['error_z'] = abs(res.y-res.y_pred_z)
res['error'] =  abs(res.y-res.y_pred)

In [None]:
res['delta'] = res.error_z / res.error

In [None]:
res

In [None]:
5.665111e+04

In [None]:
np.argmax(weightfunctions_[1].weightsmodel.feature_importances_)

In [None]:
importances = pd.DataFrame({'feature': X_Data.columns, 'importance': weightfunctions[1].weightsmodel.feature_importances_})

In [None]:
importances.sort_values(['importance'], ascending=False)

In [None]:
np.log(Y_Data.Y0)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
SKU=5

In [None]:
p = plt.hist(Y_Data_z.Y0.loc[ID_Data.SKU==SKU])

In [None]:
p = plt.hist(Y_Data.Y0.loc[ID_Data.SKU==SKU])

In [None]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global'
weightsmodel_name = 'rfwm_global'

In [None]:
tau=0

In [None]:
t=1
SKU=35

In [None]:
pp = PreProcessing()


timePeriods = ID_Data.sale_yearweek
timePeriodsTestStart = test_start+t-1

        
# Select and reshape training and test data
args = {'train': (timePeriods < timePeriodsTestStart), 'test': (timePeriods == timePeriodsTestStart), 
        'timePeriods': timePeriods[(timePeriods < timePeriodsTestStart)], 'maxTimePeriod': timePeriodsTestStart-1, 'tau': tau}

id_train, id_test = pp.train_test_split(ID_Data, **args)
X_train, X_test = pp.train_test_split(X_Data, **args, to_array=True)
y_train, y_test = pp.train_test_split(Y_Data, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)

In [None]:
X_train[id_train.SKU==SKU].shape

In [None]:
Y_Data[(ID_Data.SKU==SKU) & (ID_Data.sale_yearweek < test_start+t-1)]

In [None]:
ID_Data[(ID_Data.SKU==SKU) & (ID_Data.sale_yearweek < test_start+t-1)]

In [None]:
id_train.loc[id_train.SKU==SKU]

In [None]:
X_Data.SKU_dqs_min_lag1.loc[(ID_Data.SKU==SKU) & (ID_Data.sale_yearweek==99)]

In [None]:
X_train[id_train.SKU==SKU,7]

In [None]:
tau=0

In [None]:
weightsmodel_name = 'rfwm_global'

In [None]:
rfmodel_global = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib') 

In [None]:
rfmodel_global[1].weightsmodel.oob_score_

In [None]:
weightsmodel_name = 'rfwm_global_z'

In [None]:
rfmodel_global_z = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib') 

In [None]:
rfmodel_global_z[1].weightsmodel.oob_score_

In [None]:
y_pred = rfmodel_global[1].weightsmodel.predict(X_train)

In [None]:
y_pred.shape

In [None]:
tau=0

In [None]:
pp = PreProcessing()


timePeriods = ID_Data.sale_yearweek
timePeriodsTestStart = test_start+t-1

        
# Select and reshape training and test data
args = {'train': (timePeriods < timePeriodsTestStart), 'test': (timePeriods == timePeriodsTestStart), 
        'timePeriods': timePeriods[(timePeriods < timePeriodsTestStart)], 'maxTimePeriod': timePeriodsTestStart-1, 'tau': tau}

id_train, id_test = pp.train_test_split(ID_Data, **args)
X_train_z, X_test_z = pp.train_test_split(X_Data_z, **args, to_array=True)
y_train_z, y_test_z = pp.train_test_split(Y_Data_z, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)

In [None]:
y_pred_z = rfmodel_global_z[1].weightsmodel.predict(X_train_z)

In [None]:
y_pred_z.shape[0]

In [None]:
# rescale 

In [None]:
scaler = scaler_fitted[SKU]

In [None]:
y_pred_zz = scaler.inverse_transform(y_pred_z.reshape(y_pred_z.shape[0],1))

In [None]:
y_train.shape, y_pred_zz.flatten().shape, y_pred.shape

In [None]:
mse = []
mse_z = []

for SKU in SKUs:
    sel = id_train.SKU == SKU
    mse += [np.mean((y_train[sel] - y_pred[sel])**2)]
    mse_z += [np.mean((y_train[sel] - y_pred_zz[sel])**2)]

In [None]:
res = pd.DataFrame({'SKU': np.array(SKUs), 'mse': np.array(mse), 'mse_z': np.array(mse_z)})
res['diff'] = res.mse_z / res.mse

In [None]:
res.describe()

In [None]:
rf_mv0 = joblib.load(PATH_WEIGHTSMODEL+'/rf_mv_t1.joblib')  
rf_mv1 = joblib.load(PATH_WEIGHTSMODEL+'/rf_mv_t2.joblib')  
rf_mv2 = joblib.load(PATH_WEIGHTSMODEL+'/rf_mv_t3.joblib')  
rf_mv3 = joblib.load(PATH_WEIGHTSMODEL+'/rf_mv_t4.joblib')  
rf_mv4 = joblib.load(PATH_WEIGHTSMODEL+'/rf_mv_t5.joblib')  

In [None]:
rfmodel_global[1].weightsmodel

In [None]:
rf_mv0

In [None]:
pp = PreProcessing()

In [None]:
# Read data
ID_Data = pd.read_csv(PATH_DATA+'/ID_Data.csv')
X_Data = pd.read_csv(PATH_DATA+'/X_Data.csv')
X_Data_Columns = pd.read_csv(PATH_DATA+'/X_Data_Columns3.csv')
Y_Data = pd.read_csv(PATH_DATA+'/Y_Data.csv')

In [None]:
# Select features
X_Data_Columns = X_Data_Columns.loc[X_Data_Columns.Global == 'YES']
X_Data = X_Data[X_Data_Columns.Feature.values]

In [None]:
# Select training data
ID_Data_train = ID_Data.loc[ID_Data.sale_yearweek < test_start]
X_Data_train = X_Data.loc[ID_Data.sale_yearweek < test_start]

# Prepare
vars_to_scale_names = X_Data_Columns.loc[X_Data_Columns.Scale == 'YES', 'Feature'].values
vars_to_scale = np.array(X_Data[vars_to_scale_names])
vars_to_scale_with = np.array(Y_Data_train[['Y','Y','Y','Y','Y']])

vars_to_scale_groups = np.array(ID_Data.SKU)
vars_to_scale_with_groups = np.array(ID_Data_train.SKU)

# Fit and transform
scaler = MinMaxScaler()
vars_scaled, scaler_fitted = pp.scale_variables(vars_to_scale, vars_to_scale_with, vars_to_scale_groups, vars_to_scale_with_groups, scaler)

# Reshape to original data
vars_scaled = pd.concat([pd.DataFrame(vars_scaled[i], columns=vars_to_scale_names) for i in vars_scaled]).reset_index(drop=True)
X_Data_z = copy.deepcopy(X_Data)
for col in vars_scaled.columns:
    X_Data_z[col] = vars_scaled[col]

In [None]:
# Select training data
ID_Data_train = ID_Data.loc[ID_Data.sale_yearweek < test_start]
Y_Data_train = Y_Data.loc[ID_Data.sale_yearweek < test_start]

# Prepare
vars_to_scale = np.array(Y_Data)
vars_to_scale_with = np.array(Y_Data_train)

vars_to_scale_groups = np.array(ID_Data.SKU)
vars_to_scale_with_groups = np.array(ID_Data_train.SKU)

# Fit and transform
scaler = MinMaxScaler()
vars_scaled, scaler_fitted = pp.scale_variables(vars_to_scale, vars_to_scale_with, vars_to_scale_groups, vars_to_scale_with_groups, scaler)

# Reshape to original data
Y_Data_z = pd.concat([pd.DataFrame(vars_scaled[i], columns=['Y']) for i in vars_scaled]).reset_index(drop=True)

In [None]:
# Create multi-period demand vectors
data = pd.concat([ID_Data, Y_Data_z], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
    
Y_Data_z = pd.DataFrame(Y)

data = pd.concat([ID_Data, Y_Data], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
    
Y_Data = pd.DataFrame(Y)

In [None]:
pp = PreProcessing()

### Function to generate samples, fit weight functions, and generate weights
def training_and_sampling(ID_Data, X_Data, Y_Data, tau, timePeriods, timePeriodsTestStart):

    """
    ...

    """           
    # Select and reshape training and test data
    args = {'train': (timePeriods < timePeriodsTestStart), 'test': (timePeriods == timePeriodsTestStart), 
            'timePeriods': timePeriods[(timePeriods < timePeriodsTestStart)], 'maxTimePeriod': timePeriodsTestStart-1, 'tau': tau}

    id_train, id_test = pp.train_test_split(ID_Data, **args)
    X_train, X_test = pp.train_test_split(X_Data, **args, to_array=True)
    y_train, y_test = pp.train_test_split(Y_Data, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)

    # Store samples of historical demands
    samples = {'y_train': y_train, 'y_test': y_test, 
               'X_train': X_train, 'X_test': X_test, 
               'id_train': id_train, 'id_test': id_test}
    
    return samples

## Local Training and Sampling

The two local models (using 'Local Training and Sampling') are **Rolling Horizon Local Weighted SAA (wSAA)**, and **Rolling Horizon Local Robust Weighted SAA (wSAA-R)**, which is the analogous model with robust extension.

Given product $k$, period $t$, and look-ahead $\tau$, both models apply Weighted SAA over the 'local' distribution $\{w_{k,t,\tau}^{\,i}(x_{k,t}^{\,i}),(d_{k,t}^{\,i},...,d_{k,t+\tau}^{\,i})\}_{i=1}^{N_{k,t,\tau}}$, with weight functions $w_{k,t,\tau}(\,\cdot\,)$ trained on data $S_{k,t,\tau}^{\,\text{Local}}=\{(x_{k,t}^{\,i},d_{k,t}^{\,i},...,d_{k,t+\tau}^{\,i})\}_{i=1}^{N_{k,t,\tau}}$ for each product $k=1,...,M$ separately.

### Data Preprocessing

We first load and pre-process the data. This includes reshaping demand time series into $(\tau+1)$-periods rolling look-ahead horizon sequences.

- **ID_Data** (pd.DataFrame) stores identifiers (in particular the product (SKU) identifier and the timePeriod (sale_yearweek) identifier)
- **X_Data** (pd.DataFrame) is the 'feature matrix', i.e., each row is a feature vector $x_{j,n}$ where n is the number of training observations (rows) in the data
- **Y_Data** (pd.DataFrame) is the demand data $d_{j,n}$ (a times series per product)
- **X_Data_Columns** (pd.DataFrame) provides 'selectors' for local vs. global feature sets

In [None]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_local'
weightsmodel_name = 'rfwm_local'

In [None]:
# Read data
ID_Data = pd.read_csv(PATH_DATA+'/ID_Data.csv')
X_Data = pd.read_csv(PATH_DATA+'/X_Data.csv')
X_Data_Columns = pd.read_csv(PATH_DATA+'/X_Data_Columns.csv')
Y_Data = pd.read_csv(PATH_DATA+'/Y_Data.csv')

In [None]:
# Select features
X_Data_Columns = X_Data_Columns.loc[X_Data_Columns.Local == 'YES']
X_Data = X_Data[X_Data_Columns.Feature.values]

In [None]:
# Ensure data is sorted by SKU and sale_yearweek for preprocessing
data = pd.concat([ID_Data, X_Data, Y_Data], axis=1).sort_values(by=['SKU', 'sale_yearweek']).reset_index(drop=True)

ID_Data = data[ID_Data.columns]
X_Data = data[X_Data.columns]
Y_Data = data[Y_Data.columns]

In [None]:
# Create multi-period demand vectors
data = pd.concat([ID_Data, Y_Data], axis=1)
Y = {}
for tau in taus:
    Y['Y'+str(tau)] = data.groupby(['SKU']).shift(-tau)['Y']
Y_Data = pd.DataFrame(Y)

### Weights model

The weights model - and thus the data used, weight functions, and weights per sample - are the same for the two local models **wSAA** and **wSAA-R**. First, we tune the hyper parameters of the random forest weights model for each given look-ahead $\tau$ (as for each look-ahead $\tau$ we have a different response for the multi-output random forest regressor) and for each product (SKU) $k=1,...,M$ separately. Second, we fit all weight functions (for each look-ahead $\tau=0,...,4$ and over periods $t=1,...,T$) for each product (SKU) $k=1,...,M$ separately and generate all weights (for each look-ahead $\tau=0,...,4$, over periods $t=1,...,T$, and for each product (SKU) $k=1,...,M$ separatey).

#### Tune weights model

To tune the hyper parameters of the local random forest weights model for each product (SKU) $k=1,...,M$, we use 3-fold rolling timeseries cross-validation on the training data and perform random search with 100 iterations over the specified hyper parameter search grid.

In [None]:
# Set parameters to tune random forest weights kernels
model_params = {
    'oob_score': True,
    'random_state': 12345,
    'n_jobs': 1,
    'verbose': 0
}

hyper_params_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None],
    'min_samples_split': [x for x in range(2, 20, 1)],  
    'min_samples_leaf': [x for x in range(2, 10, 1)],  
    'max_features': [x for x in range(8, 256, 8)],   
    'max_leaf_nodes': [None],
    'min_impurity_decrease': [0.0],
    'bootstrap': [True],
    'max_samples': [0.75, 0.80, 0.85, 0.90, 0.95, 1.00]
}    


tuning_params = {     
    'n_iter': 100,
    'scoring': {'MSE': 'neg_mean_squared_error'},
    'return_train_score': True,
    'refit': 'MSE',
    'random_state': 12345,
    'n_jobs': 32,
    'verbose': 0
}    

random_search = True
print_status = False

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Status
    print('Look-ahead tau='+str(tau)+'...')
    start_time = dt.datetime.now().replace(microsecond=0)
    
    # Initialize
    cv_results = {}
    
    # For each product (SKU) k=1,...,M
    for SKU in SKUs:

        # Initialize preprocessing module
        pp = PreProcessing()

        # Select and reshape training and test data
        args = {'train': (ID_Data.SKU == SKU) & (ID_Data.sale_yearweek < test_start), 
                'timePeriods': ID_Data.loc[(ID_Data.SKU == SKU) & (ID_Data.sale_yearweek < test_start)].sale_yearweek, 
                'maxTimePeriod': test_start-1, 'tau': tau}

        id_train = pp.train_test_split(ID_Data, **args)
        X_train = pp.train_test_split(X_Data, **args, to_array=True)
        y_train = pp.train_test_split(Y_Data, **args, rolling_horizon=[l for l in range(0,tau+1)], to_array=True)

        # Initialize
        weightsmodel = RandomForestWeightsModel(model_params)

        # CV search
        cv_folds = pp.split_timeseries_cv(n_splits=3, timePeriods=id_train.sale_yearweek)
        cv_results[SKU] = weightsmodel.tune(X_train, y_train, cv_folds, hyper_params_grid, 
                                            tuning_params, random_search, print_status)
        
        # Status
        print('SKU '+str(SKU)+' of '+str(len(SKUs))+' in', dt.datetime.now().replace(microsecond=0) - start_time, end='\r', flush=True)

    # Save
    _ = joblib.dump(cv_results, PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau)+'.joblib')
    print('')

#### Fit weight functions and generate weights

We now fit a local random forest weights model (i.e., the weight functions) for each $\tau=0,...,4$, period $t=1,...,T$, and product (SKU) $k=1,...,M$ separately (local training). Then, for each $\tau=0,...,4$, period $t=1,...,T$, and product (SKU) $k=1,...,M$ separately, we generate the weights given the test feature $x_{k,t}$. This is done *separately* for each product (SKU) $k=1,...,M$.

In [None]:
# Set parameters
model_params = {
    'n_jobs': 32,
    'verbose': 0
}

print_status = True

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Status
    print('Look-ahead tau='+str(tau)+'...')
    start_time = dt.datetime.now().replace(microsecond=0)
    
    # Initialize
    samples, weightfunctions, weightfunctions_times, weights, weights_times = {}, {}, {}, {}, {}
    
    # For each product (SKU) k=1,...,M
    for SKU in SKUs:
        
        # Initialize
        samples[SKU], weightfunctions[SKU], weightfunctions_times[SKU], weights[SKU], weights_times[SKU] = {}, {}, {}, {}, {}
        
        # For each period t=1,...,T
        for t in ts:
        
            # Adjust look-ahead tau to account for end of horizon
            tau_ = min(tau,T-t)

            # Generate samples, fit weight functions, and generate weights (based on tuned weights model)
            weightsmodel = RandomForestWeightsModel()
            weightsmodel.load_cv_result(path=PATH_WEIGHTSMODEL+'/'+weightsmodel_cv_name+'_tau'+str(tau_)+'.joblib', SKU=SKU)
            res = weightsmodel.training_and_sampling(ID_Data.loc[ID_Data.SKU==SKU], X_Data.loc[ID_Data.SKU==SKU], Y_Data.loc[ID_Data.SKU==SKU], 
                                                     tau=tau_, timePeriods=ID_Data.loc[ID_Data.SKU==SKU].sale_yearweek, 
                                                     timePeriodsTestStart=test_start+t-1, model_params=model_params)
            samples[SKU][t], weightfunctions[SKU][t], weightfunctions_times[SKU][t], weights[SKU][t], weights_times[SKU][t] = res

        # Status
        print('SKU '+str(SKU)+' of '+str(len(SKUs))+' in', dt.datetime.now().replace(microsecond=0) - start_time, end='\r', flush=True)
        
    # Save
    _ = joblib.dump(samples, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')  
    _ = joblib.dump(weightfunctions, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weightfunctions_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weightfunctions_times_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')    
    _ = joblib.dump(weights_times, PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_times_tau'+str(tau)+'.joblib')
    print('')

# Rolling Horizon Optimization

The code below runs an experiment for all given products (SKUs) $k=1,...,M$ over a test planning horizon $t=1,...,T$ with $T=13$ for three different cost parameter settings $\{K, u, h, b\}$ that vary the critical ratio ($CR=\frac{b}{b+h}$) of holding and backlogging yielding
- $CR=0.50$: $\{K=100, u=0.5, h=1, b=1\}$
- $CR=0.75$: $\{K=100, u=0.5, h=1, b=3\}$
- $CR=0.90$: $\{K=100, u=0.5, h=1, b=9\}$

We run experiments for different choices of the look-ahead $\tau=0,...,4$.

## Experiment functions

We first define several functions for experiments over different choices for the look-ahead $\tau=0,...,4$, cost parameter settings $\{K,u,h,b\}$, products (SKUs) $k=1,...,M$, and periods $t=1,...,T$ of the planning horizon.

In [12]:
### Function to extract global historical demand samples, weights, and actuals for each product and test period
def prep_samples_and_weights(samples, weights=None, e=None, **kwargs):

    """
    ...
    
    """
    
    SKUs = kwargs.get('SKUs', range(1,460+1))
    ts = kwargs.get('ts', range(1,13+1))
    
    # Local
    if len(samples) == len(SKUs):
        
        # Samples
        samples_ = {}
        for SKU in SKUs:
            samples_[SKU] = {}
            for t in ts:
                samples_[SKU][t] = samples[SKU][t]['y_train']
                
        # Actuals
        actuals_ = {}
        for SKU in SKUs:
            actuals_[SKU] = {}
            for t in ts:
                actuals_[SKU][t] = samples[SKU][t]['y_test'].flatten()
                
        # Weights   
        if not weights is None:
            weights_ = {}
            for SKU in SKUs:
                weights_[SKU] = {}
                for t in ts:
                    weights_[SKU][t] = weights[SKU][t].flatten()
                    
        # Epsilons
        if not e is None:
            epsilons_ = {}
            for SKU in SKUs:
                epsilons_[SKU] = {}
                for t in ts:
                    epsilons_[SKU][t] = e*np.std(samples[SKU][t]['y_train'], axis=0).flatten()[0]
                 
    # Global
    else:
        
        # Samples
        samples_ = {}
        for t in ts:
            samples_[t] = samples[t]['y_train']

        # Actuals
        actuals_ = {}
        for SKU in SKUs:
            actuals_[SKU] = {}
            for t in ts:
                actuals_[SKU][t] = samples[t]['y_test'][samples[t]['id_test'].SKU==SKU].flatten()

        # Weights   
        if not weights is None:
            weights_ = {}
            for SKU in SKUs:
                weights_[SKU] = {}
                for t in ts:
                    weights_[SKU][t] = weights[t][samples[t]['id_test'].SKU==SKU].flatten()
                    
        # Epsilons
        if not e is None:
            epsilons_ = {}
            for SKU in SKUs:
                epsilons_[SKU] = {}
                for t in ts:
                    epsilons_[SKU][t] = e*np.std(samples[t]['y_train'][samples[t]['id_train'].SKU == SKU], axis=0).flatten()[0]

    # Return
    if not weights is None:
        if not e is None:
            return samples_, actuals_, weights_, epsilons_
        else:
            return samples_, actuals_, weights_
    else:
        if not e is None:
            return samples_, actuals_, epsilons_
        else:
            return samples_, actuals_

In [13]:
### Function to run an experiment over a list of given cost parameter settings and the specified model
def run_experiment(wsaamodel, cost_params, samples, actuals, weights=None, epsilons=None, print_progress=False,
                   path_to_save=None, name_to_save=None, return_results=True, **kwargs):
    
    """
    ...
    
    """
    
    # Raise error if cost_params is not a list of dict(s)
    if not type(cost_params)==list:
        raise ValueError('Argument cost_params has to be a list of at least one dict with keys {K, u, h, b}')  
            
    # Timer
    st_exec, st_cpu = time.time(), time.process_time()

    # Status
    if print_progress and 'SKU' in kwargs: print('SKU:', kwargs['SKU'])
    
    # Initialize
    ropt, results = RollingHorizonOptimization(), pd.DataFrame()

    # For each cost param setting
    for cost_params_ in cost_params:

        # Print progress
        if print_progress: print('...cost param setting:', cost_params_)

        # Apply (Weighted) SAA  model
        wsaamodel.set_params(**{**kwargs, **cost_params_})
        result = ropt.run(wsaamodel, samples, actuals, weights, epsilons)

        # Store results
        meta = pd.DataFrame({'CR': cost_params_['CR'], **kwargs}, index=list(range(len(samples))))
        results = pd.concat([results, pd.concat([meta, result], axis=1)], axis=0)

    # Save result as csv file
    if not path_to_save is None and not name_to_save is None:
        results.to_csv(path_or_buf=(path_to_save+'/'+name_to_save+'_SKU'+str(kwargs.get('SKU', None))+
                                    '_tau'+str(kwargs.get('tau', None))+'.csv'), sep=',', index=False)

    # Timer
    exec_time_sec, cpu_time_sec = time.time() - st_exec, time.process_time() - st_cpu
    
    # Status
    if print_progress: print('>>>> Done:', str(np.around(exec_time_sec/60,1)), 'minutes')

    # Return  
    return results if return_results else {'SKU': kwargs.get('SKU', None), 'exec_time_sec': exec_time_sec, 'cpu_time_sec': cpu_time_sec}

In [14]:
### Function to run an experiment over a list of given cost parameter settings and the specified model
def run_experiment(wsaamodel, cost_params, actuals, samples=None, weights=None, epsilons=None, print_progress=False,
                   path_to_save=None, name_to_save=None, return_results=True, **kwargs):
    
    """
    ...
    
    """
    
    # Raise error if cost_params is not a list of dict(s)
    if not type(cost_params)==list:
        raise ValueError('Argument cost_params has to be a list of at least one dict with keys {K, u, h, b}')  
    
    # Timer
    st_exec, st_cpu = time.time(), time.process_time()

    # Status
    if print_progress and 'SKU' in kwargs: print('SKU:', kwargs['SKU'])
    
    # Initialize
    ropt, results = RollingHorizonOptimization(), pd.DataFrame()

    # For each cost param setting
    for cost_params_ in cost_params:

        # Print progress
        if print_progress: print('...cost param setting:', cost_params_)
        
        # Check if samples provided
        if not samples is None:
            
            # Apply (Weighted) SAA  model
            wsaamodel.set_params(**{**kwargs, **cost_params_})
            result = ropt.run(wsaamodel, samples, actuals, weights, epsilons, q_ub=False)
             
            # Get T
            T = len(samples)
            
        else:
            
            # Apply ex-post clairvoyant model
            wsaamodel.set_params(**{**kwargs, **cost_params_})
            result = ropt.run_expost(wsaamodel, actuals)
            
            # Get T
            T = actuals.shape[1]

        # Store results
        meta = pd.DataFrame({'CR': cost_params_['CR'], **kwargs}, index=list(range(T)))
        results = pd.concat([results, pd.concat([meta, result], axis=1)], axis=0)

    # Save result as csv file
    if not path_to_save is None and not name_to_save is None:
        results.to_csv(path_or_buf=(path_to_save+'/'+name_to_save+'_SKU'+str(kwargs.get('SKU', None))+
                                    '_tau'+str(kwargs.get('tau', None))+'.csv'), sep=',', index=False)

    # Timer
    exec_time_sec, cpu_time_sec = time.time() - st_exec, time.process_time() - st_cpu
    
    # Status
    if print_progress: print('>>>> Done:', str(np.around(exec_time_sec/60,1)), 'minutes')

    # Return  
    return results if return_results else {'SKU': kwargs.get('SKU', None), 'exec_time_sec': exec_time_sec, 'cpu_time_sec': cpu_time_sec}

In [15]:
### Function to run ex-post clairvoyant experiment over a list of given cost parameter settings
def run_experiment_expost(wsaamodel, cost_params, actuals, print_progress=False, path_to_save=None, name_to_save=None, return_results=True, **kwargs):
    
    """
    ...
    
    """
    
    # Raise error if cost_params is not a list of dict(s)
    if not type(cost_params)==list:
        raise ValueError('Argument cost_params has to be a list of at least one dict with keys {K, u, h, b}')  
            
    # Timer
    st_exec, st_cpu = time.time(), time.process_time()

    # Status
    if print_progress and 'SKU' in kwargs: print('SKU:', kwargs['SKU'])
    
    # Initialize
    ropt, results = RollingHorizonOptimization(), pd.DataFrame()

    # For each cost param setting
    for cost_params_ in cost_params:

        # Print progress
        if print_progress: print('...cost param setting:', cost_params_)

        # Apply (Weighted) SAA  model
        wsaamodel.set_params(**{**kwargs, **cost_params_})
        result = ropt.run_expost(wsaamodel, actuals, q_ub=False)

        # Store results
        meta = pd.DataFrame({'CR': cost_params_['CR'], **kwargs}, index=list(range(actuals.shape[1])))
        results = pd.concat([results, pd.concat([meta, result], axis=1)], axis=0)

    # Save result as csv file
    if not path_to_save is None and not name_to_save is None:
        results.to_csv(path_or_buf=(path_to_save+'/'+name_to_save+'_SKU'+str(kwargs.get('SKU', None))+'.csv'), sep=',', index=False)

    # Timer
    exec_time_sec, cpu_time_sec = time.time() - st_exec, time.process_time() - st_cpu
    
    # Status
    if print_progress: print('>>>> Done:', str(np.around(exec_time_sec/60,1)), 'minutes')

    # Return  
    return results if return_results else {'SKU': kwargs.get('SKU', None), 'exec_time_sec': exec_time_sec, 'cpu_time_sec': cpu_time_sec}

In [16]:
### Function to aggregate all results
def aggregateResults(taus, SKUs, path_to_save, name_to_save):
    
    results = pd.DataFrame()
    
    # For each look-ahead tau=0,...,4
    for tau in taus:
        # For each product (SKU) k=1,...,M
        for SKU in SKUs:
            file_name = path_to_save+'/'+name_to_save+'_SKU'+str(SKU)+'_tau'+str(tau)+'.csv'
            # Check if results exist   
            if os.path.exists(file_name):
                results = pd.concat([results, pd.read_csv(file_name)])

    return(results)

In [17]:
### Context manager (Credits: 'https://stackoverflow.com/questions/24983493/tracking-progress-of-joblib-parallel-execution')
@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        def __call__(self, *args, **kwargs):
            tqdm_object.update(n=self.batch_size)
            return super().__call__(*args, **kwargs)

    old_batch_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_batch_callback
        tqdm_object.close()

## (a) Rolling Horizon Global Weighted SAA (GwSAA)

...

In [86]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global_z'
weightsmodel_name = 'rfwm_global_z'

In [87]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,

    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/GwSAA_NEW_z',
    'name_to_save': 'GwSAA_NEW_z',
    'print_progress': True,
    'return_results': True

}

In [91]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    
    # Prepare data
    samples_z = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
    weights_z = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')
    samples_z, _, weights_z = prep_samples_and_weights(samples_z, weights_z, SKUs=SKUs, ts=ts)

    # Prepare data - unscaled
    samples = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_samples_tau'+str(tau)+'.joblib')
    _, actuals = prep_samples_and_weights(samples, SKUs=SKUs, ts=ts)

    samples_ = {}
    for SKU in SKUs:
        samples_[SKU] = {}
        for t in ts:
            d = samples_z[t].reshape(-1,1) if samples_z[t].ndim == 1 else copy.deepcopy(samples_z[t])
            d_z = np.around(scaler_fitted[SKU].inverse_transform(d))
            samples_[SKU][t] = d_z.flatten() if samples_z[t].ndim == 1 else copy.deepcopy(d_z)
    
    # For each product (SKU) k=1,...,M
    with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
        resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=WeightedSAA(), 
                                                                 samples=samples_[SKU], weights=weights_z[SKU], actuals=actuals[SKU], 
                                                                 **experiment_params) for SKU in SKUs)

In [None]:
#### Test predictive power

In [None]:
# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    
    # Prepare data
    samples_z = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
    samples_z, actuals_z = prep_samples_and_weights(samples_z, SKUs=SKUs, ts=ts)

    # Prepare data - unscaled
    samples = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_samples_tau'+str(tau)+'.joblib')
    samples, actuals = prep_samples_and_weights(samples, SKUs=SKUs, ts=ts)

    samples_ = {}
    for SKU in SKUs:
        samples_[SKU] = {}
        for t in ts:
            d = samples_z[t].reshape(-1,1) if samples_z[t].ndim == 1 else copy.deepcopy(samples_z[t])
            d_z = np.around(scaler_fitted[SKU].inverse_transform(d))
            samples_[SKU][t] = d_z.flatten() if samples_z[t].ndim == 1 else copy.deepcopy(d_z)
            
            weightfunctions_z[t].weightsmodel.predict()

In [106]:
samples_z = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_z_samples_tau'+str(tau)+'.joblib')
weightfunctions_z = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_z_weightfunctions_tau'+str(tau)+'.joblib')
samples = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_samples_tau'+str(tau)+'.joblib')
weightfunctions = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_global_weightfunctions_tau'+str(tau)+'.joblib')

In [124]:
t=10

In [144]:
y_pred_z = weightfunctions_z[t].weightsmodel.predict(samples_z[t]['X_test'])
y_pred = weightfunctions[t].weightsmodel.predict(samples[t]['X_test'])
y = copy.deepcopy(samples[t]['y_test'])

In [149]:
y_pred_z = weightfunctions_z[t].weightsmodel.predict(samples_z[t]['X_test'])
y_pred = weightfunctions[t].weightsmodel.predict(samples[t]['X_test'])
y = copy.deepcopy(samples[t]['y_test'])

y_pred_zz = copy.deepcopy(y_pred_z)*(-999)
error_zz = copy.deepcopy(y_pred_z)*0
error = copy.deepcopy(y_pred)*0
for SKU in SKUs:
    y_pred_zz[samples_z[t]['id_test'].SKU==SKU] = (
        scaler_fitted[SKU].inverse_transform(y_pred_z[samples_z[t]['id_test'].SKU==SKU].reshape(-1,1)).flatten()
    )
    
    error_zz[samples_z[t]['id_test'].SKU==SKU] = y_pred_zz[samples_z[t]['id_test'].SKU==SKU] - y[samples_z[t]['id_test'].SKU==SKU]
    error[samples[t]['id_test'].SKU==SKU] = y_pred[samples[t]['id_test'].SKU==SKU] - y[samples[t]['id_test'].SKU==SKU]

In [190]:
ts=[1]

In [191]:
res = {}
for t in ts:
    
    y_pred_z = weightfunctions_z[t].weightsmodel.predict(samples_z[t]['X_test'])
    y_pred = weightfunctions[t].weightsmodel.predict(samples[t]['X_test'])
    y = copy.deepcopy(samples[t]['y_test'])

    y_pred_zz = copy.deepcopy(y_pred_z)*(-999)
    error_zz = copy.deepcopy(y_pred_z)*0
    error = copy.deepcopy(y_pred)*0
    for SKU in SKUs:
        y_pred_zz[samples_z[t]['id_test'].SKU==SKU] = (
            scaler_fitted[SKU].inverse_transform(y_pred_z[samples_z[t]['id_test'].SKU==SKU].reshape(-1,1)).flatten()
        )

        error_zz[samples_z[t]['id_test'].SKU==SKU] = y_pred_zz[samples_z[t]['id_test'].SKU==SKU] - y[samples_z[t]['id_test'].SKU==SKU]
        error[samples[t]['id_test'].SKU==SKU] = y_pred[samples[t]['id_test'].SKU==SKU] - y[samples[t]['id_test'].SKU==SKU]
        
        
    res[t] = {'error_zz': error_zz, 'error': error, 'y_pred_zz': y_pred_zz, 'y_pred': y_pred, 'y': y}

In [192]:
test_zz = {}
test = {}
for SKU in SKUs:
    test_zz[SKU] = np.array([])
    test[SKU] = np.array([])
    for t in ts:
        test_zz[SKU] = np.append(test_zz[SKU], res[t]['error_zz'][samples_z[t]['id_test'].SKU==SKU].flatten())
        test[SKU] = np.append(test[SKU], res[t]['error'][samples[t]['id_test'].SKU==SKU].flatten())
    test_zz[SKU] = np.mean(test_zz[SKU]**2)
    test[SKU] = np.mean(test[SKU]**2)

In [193]:
results = pd.DataFrame({'mse_zz': test_zz, 'mse': test})

In [194]:
results['diffs'] = results.mse_zz / results.mse

In [197]:
results.describe()

Unnamed: 0,mse_zz,mse,diffs
count,460.0,460.0,460.0
mean,158122100.0,10462700.0,1057154.0
std,1661554000.0,191113600.0,17688080.0
min,0.0193706,0.05364695,3.467115e-05
25%,940.2088,27.58633,1.750013
50%,23608.77,695.1627,15.17072
75%,464655.1,10201.89,386.3075
max,31156370000.0,4091510000.0,377065500.0


In [196]:
ts=range(1,13+1)

In [198]:
1.517072e+01

15.17072

In [202]:
scaler_fitted[1].data_max_, scaler_fitted[33].data_max_

(array([1258.]), array([8.]))

## (b) Rolling Horizon Global Robust Weighted SAA (GwSAA-R)

...

In [None]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_global'
weightsmodel_name = 'rfwm_global'

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,

    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/GwSAAR_NEW',
    'name_to_save_prefix': 'GwSAAR_NEW',
    'print_progress': False,
    'return_results': False

}

In [None]:
## TODO:
    # CHECK IF DUE TO INFINITY NORM I SHOULD CHOOSE EPSILON SIGNIFICANTLY HIGHER!
        # Check convergence...
        # Maybe use rather:
            # e = 1,32,64,128,256,512,1024 ?
            
    # delete uptter bound in all functions - this was wrong or did at least not help... also for Weighted SAA remove

In [None]:
# For each uncertainty set specification
for e in [1,3,6,9,12]:
    
    # Update params
    experiment_params['name_to_save'] = experiment_params['name_to_save_prefix']+'_e'+str(e).replace('.', '')
    
    # Set path
    if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

    # For each look-ahead tau=0,...,4
    for tau in taus:

        # Print:
        print('Look-ahead tau='+str(tau)+'...')

        # Prepare data
        samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
        weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

        samples, actuals, weights, epsilons = prep_samples_and_weights(samples, weights, e=e, SKUs=SKUs, ts=ts)

        # For each product (SKU) k=1,...,M
        with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
            resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=RobustWeightedSAA2(), 
                                                                     samples=samples, weights=weights[SKU], epsilons=epsilons[SKU],
                                                                     actuals=actuals[SKU], e=e, **experiment_params) for SKU in SKUs)

## (c) Rolling Horizon Local Weighted SAA (wSAA)

...

In [None]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_local'
weightsmodel_name = 'rfwm_local'

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/wSAA_NEW',
    'name_to_save': 'wSAA_NEW',
    'print_progress': False,
    'return_results': False

}

In [None]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    # Prepare data
    samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
    weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

    samples, actuals, weights = prep_samples_and_weights(samples, weights, SKUs=SKUs, ts=ts)
    
    # For each product (SKU) k=1,...,M
    with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
        resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=WeightedSAA(), 
                                                                 samples=samples[SKU], weights=weights[SKU], actuals=actuals[SKU], 
                                                                 **experiment_params) for SKU in SKUs)

In [None]:
# Aggregate results
results = aggregateResults(taus, SKUs, experiment_params['path_to_save'], experiment_params['name_to_save'])
results

In [None]:
# Save aggregate results
file_name = experiment_params['path_to_save']+'/'+experiment_params['name_to_save']+'_results.csv'
results.to_csv(file_name, sep=',', index=False)

## (d) Rolling Horizon Local Robust Weighted SAA (wSAA-R)

...

In [None]:
# Weights model names
weightsmodel_cv_name = 'cv_rfwm_local'
weightsmodel_name = 'rfwm_local'

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/wSAAR_NEW',
    'name_to_save_prefix': 'wSAAR_NEW',
    'print_progress': False,
    'return_results': False

}

In [None]:
# For each uncertainty set specification
for e in [1,3,6,9,12]:
    
    # Print:
    print('Uncertainty set parameter e='+str(e)+'...')
        
    # Update params
    experiment_params['name_to_save'] = experiment_params['name_to_save_prefix']+'_e'+str(e).replace('.', '')
    
    # Set path
    if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

    # For each look-ahead tau=0,...,4
    for tau in taus:

        # Print:
        print('...look-ahead tau='+str(tau)+'...')

        # Prepare data
        samples = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_samples_tau'+str(tau)+'.joblib')
        weights = joblib.load(PATH_WEIGHTSMODEL+'/'+weightsmodel_name+'_weights_tau'+str(tau)+'.joblib')

        samples, actuals, weights, epsilons = prep_samples_and_weights(samples, weights, e=e, SKUs=SKUs, ts=ts)

        # For each product (SKU) k=1,...,M
        with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
            resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=RobustWeightedSAA2(), 
                                                                     samples=samples[SKU], weights=weights[SKU], epsilons=epsilons[SKU],
                                                                     actuals=actuals[SKU], e=e, **experiment_params) for SKU in SKUs)

## (e) Baseline model: Rolling Horizon Local Weighted SAA (SAA)

...

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/SAA_NEW',
    'name_to_save': 'SAA_NEW',
    'print_progress': False,
    'return_results': False

}

In [None]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each look-ahead tau=0,...,4
for tau in taus:
    
    # Print:
    print('Look-ahead tau='+str(tau)+'...')
    
    # Prepare data
    samples = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_local_samples_tau'+str(tau)+'.joblib')
    
    samples, actuals = prep_samples_and_weights(samples, SKUs=SKUs, ts=ts)
    
    # For each product (SKU) k=1,...,M
    with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
        resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(tau=tau, SKU=SKU, wsaamodel=WeightedSAA(), 
                                                                 samples=samples[SKU], actuals=actuals[SKU], 
                                                                 **experiment_params) for SKU in SKUs)

## (f) Ex-post optimal model with deterministic demand

...

In [None]:
# Define experiment paramaters
experiment_params = {
            
    # Cost param settings
    'cost_params': cost_params,
    
    # Gurobi meta params
    'LogToConsole': 0, 
    'Threads': 1, 
    'NonConvex': 2, 
    'PSDTol': 1e-3, # 0.1%
    'MIPGap': 1e-3, # 0.1%
    'NumericFocus': 0, 
    'obj_improvement': 1e-3, # 0.1%
    'obj_timeout_sec': 3*60, # 3 min
    'obj_timeout_max_sec': 10*60, # 10 min

    # Program meta params
    'path_to_save': PATH_RESULTS+'/ExPost_NEW',
    'name_to_save': 'ExPost_NEW',
    'print_progress': False,
    'return_results': False

}

In [None]:
# Prepare data
samples = joblib.load(PATH_WEIGHTSMODEL+'/rfwm_local_samples_tau'+str(0)+'.joblib')
actuals = {}
for SKU in SKUs:
    d = []
    for t in ts:
        d = d + [samples[SKU][t]['y_test'].item()]
    actuals[SKU] = np.array(d).reshape(1,len(d))

In [None]:
# Set path
if not os.path.exists(experiment_params['path_to_save']): os.mkdir(experiment_params['path_to_save'])

# For each product (SKU) k=1,...,M
with tqdm_joblib(tqdm(desc='Progress', total=len(SKUs))) as progress_bar:
    resultslog = Parallel(n_jobs=32)(delayed(run_experiment)(SKU=SKU, wsaamodel=WeightedSAA(), actuals=actuals[SKU], **experiment_params) for SKU in SKUs)