# Imports

In [None]:
# Import utils
import numpy as np
import pandas as pd
import copy
import time
import datetime as dt
import pickle
import json
from pathlib import Path
import joblib
from joblib import dump, load, Parallel, delayed
import os
import itertools
import contextlib
from tqdm import tqdm

from Experiment import Experiment
from Experiment import Evaluation

In [4]:
# Setup the experiment
experiment_setup = dict(

    # Set paths
    path_data = '/home/fesc/DDDInventoryControl/Data',
    path_weightsmodel = '/home/fesc/DDDInventoryControl/Data/WeightsModel',
    path_results = '/home/fesc/DDDInventoryControl/Data/Results',
    path_publish = '/home/fesc/DDDInventoryControl/Data/Results/Publish',
    
    # Weights models
    global_weightsmodel = 'rfwm_global', 
    local_weightsmodel = 'rfwm_local', 

    # Optimization models
    GwSAA = 'GwSAA',
    GwSAAR = 'GwSAAR',
    wSAA = 'wSAA',
    wSAAR = 'wSAAR',
    SAA = 'SAA',
    ExPost = 'ExPost',
    
    # Set product identifiers
    products = range(1,460+1),   # Products (SKUs) k=1,...,M
    
    # Set problem params
    T = 13,             # Planning horizon T
    ts = range(1,13+1), # Periods t=1,...,T of the planning horizon
    taus = [0,1,2,3,4], # Look-aheads tau=0,...,4
    es = [0.33, 0.67, 1.00, 1.33, 1.67, 2.00, 2.33, 2.67, 3.00], # Uncertainty set specifications e
       
    # Set cost params
    cost_params = [
        {'CR': 0.50, 'K': 100, 'u': 0.5, 'h': 1, 'b': 1},
        {'CR': 0.75, 'K': 100, 'u': 0.5, 'h': 1, 'b': 3},
        {'CR': 0.90, 'K': 100, 'u': 0.5, 'h': 1, 'b': 9}
    ]
)

# Make all experiment variables visible locally
locals().update(experiment_setup)

# Initialize Evaluation
experiment = Experiment(**experiment_setup)
evaluation = Evaluation(**experiment_setup)

# I. Empirical evaluation based on best ex-post out-of-sample results

In [5]:
# Load results
Results_summary = pd.read_csv(path_results+'/OutOfSample/Results_summary_best.csv')

In [6]:
# Append predictive performance
rmsse_global = pd.read_csv(path_results+'/OutOfSample/Predictive_performance_rmsse_global.csv')
rmsse_local = pd.read_csv(path_results+'/OutOfSample/Predictive_performance_rmsse_local.csv')
spl_global = pd.read_csv(path_results+'/OutOfSample/Predictive_performance_spl_global.csv')
spl_local = pd.read_csv(path_results+'/OutOfSample/Predictive_performance_spl_local.csv')

## A. Comparison of Model Performance

In [7]:
# Prescriptive performance per product and in total (by CR and model)
evaluation = Evaluation(**experiment_setup)
pq, pq_bygroup = evaluation.prescriptive_performance(Results_summary.cost, Results_summary.cost_SAA, 
                                                     ids_groups=Results_summary[['CR', 'model']],
                                                     groupby=['CR', 'model'])

In [8]:
# Store
Results_summary['pq'] = copy.deepcopy(pq)
Results_summary_total = copy.deepcopy(pq_bygroup)

In [9]:
# Save
Results_summary.to_csv(path_publish+'/PrescriptivePerformance_perProduct.csv', sep=',', index=False)
Results_summary_total.to_csv(path_publish+'/PrescriptivePerformance_total.csv', sep=',', index=False)

### Total prescriptive performance (cost relative to cost of SAA)

In [10]:
Results_summary_total

Unnamed: 0,CR,model,cost,cost_saa,pq
0,0.5,GwSAA,7128693.5,9510984.0,0.749522
1,0.5,GwSAAR,7351727.0,9510984.0,0.772972
2,0.5,wSAA,7274623.5,9510984.0,0.764865
3,0.5,wSAAR,7376036.5,9510984.0,0.775528
4,0.75,GwSAA,9243822.0,13342178.5,0.692827
5,0.75,GwSAAR,9453320.5,13342178.5,0.708529
6,0.75,wSAA,9790016.5,13342178.5,0.733764
7,0.75,wSAAR,9604056.5,13342178.5,0.719827
8,0.9,GwSAA,11998488.5,20553657.0,0.583764
9,0.9,GwSAAR,11686764.0,20553657.0,0.568598


### Per product prescriptive performance (cost relative to cost of SAA)

In [11]:
Results_summary.groupby(['CR', 'model']).agg({'pq': np.median}).reset_index()

Unnamed: 0,CR,model,pq
0,0.5,GwSAA,0.884712
1,0.5,GwSAAR,0.856468
2,0.5,wSAA,0.915127
3,0.5,wSAAR,0.857017
4,0.75,GwSAA,0.851111
5,0.75,GwSAAR,0.827303
6,0.75,wSAA,0.892095
7,0.75,wSAAR,0.832024
8,0.9,GwSAA,0.831216
9,0.9,GwSAAR,0.816604


### Differences of per product prescriptive performance (cost relative to cost of SAA) incl. significance test

In [12]:
evaluation = Evaluation(**experiment_setup)
Results_differences = evaluation.differences(Results_summary, test='paired')

In [13]:
Results_differences

Unnamed: 0,CR,model,benchmark,mean_of_differences,median_of_differences,share_model_is_better,share_cost_model_is_better,statistic,pvalue
0,0.5,GwSAA,wSAA,-0.017564,-0.009518,0.63895,0.823546,17207.0,1.341942e-23
1,0.5,GwSAA,GwSAAR,-0.058317,0.01359,0.218818,0.348219,67605.0,1.0
2,0.5,GwSAA,wSAAR,-0.057982,0.01373,0.225383,0.401985,65999.0,1.0
3,0.5,wSAA,GwSAA,0.017564,0.009518,0.238512,0.175258,63394.0,1.0
4,0.5,wSAA,GwSAAR,-0.040752,0.029172,0.157549,0.161877,70735.0,1.0
5,0.5,wSAA,wSAAR,-0.040418,0.027914,0.172867,0.37566,71710.0,1.0
6,0.5,GwSAAR,GwSAA,0.058317,-0.01359,0.676149,0.650996,16240.0,3.443907e-27
7,0.5,GwSAAR,wSAA,0.040752,-0.029172,0.726477,0.836259,11075.0,2.888017e-37
8,0.5,GwSAAR,wSAAR,0.000335,0.0,0.435449,0.619848,39775.5,0.5130269
9,0.5,wSAAR,GwSAA,0.057982,-0.01373,0.66302,0.596682,16622.0,8.575527999999999e-26


### Achieved service level

In [14]:
service_levels = evaluation.service_level(Results_summary)

In [15]:
service_levels

Unnamed: 0,CR,model,n_stockouts,n_stockouts_SAA,n_periods,sl,sl_SAA
0,0.5,GwSAA,3842,5087,5980,0.357525,0.149331
1,0.5,GwSAAR,3352,5087,5980,0.439465,0.149331
2,0.5,wSAA,4181,5087,5980,0.300836,0.149331
3,0.5,wSAAR,3416,5087,5980,0.428763,0.149331
4,0.75,GwSAA,2147,3695,5980,0.64097,0.382107
5,0.75,GwSAAR,1484,3695,5980,0.751839,0.382107
6,0.75,wSAA,2621,3695,5980,0.561706,0.382107
7,0.75,wSAAR,1535,3695,5980,0.743311,0.382107
8,0.9,GwSAA,895,2110,5980,0.850334,0.647157
9,0.9,GwSAAR,360,2110,5980,0.939799,0.647157


### Ex-post model selection

#### Frequency of selected model

#### Differences to model selection

## B. Structural Insights: From Predictive To Prescriptive Performance

## C. Practical Insights: Choosing the Rolling Horizon

# II. Empirical evaluation based on ex-ante model parameter selection

In [44]:
# Load best in-sample results
results_GwSAA_best = pd.read_csv(path_results+'/InSample'+'/'+GwSAA+'_results_summary_best.csv')
results_wSAA_best = pd.read_csv(path_results+'/InSample'+'/'+wSAA+'_results_summary_best.csv')
results_GwSAAR_best = pd.read_csv(path_results+'/InSample'+'/'+GwSAAR+'_results_summary_best.csv')
results_wSAAR_best = pd.read_csv(path_results+'/InSample'+'/'+wSAAR+'_results_summary_best.csv')
results_SAA_best = pd.read_csv(path_results+'/InSample'+'/'+SAA+'_results_summary_best.csv')

In [45]:
# Load out-of-sample results
results_GwSAA = pd.read_csv(path_results+'/OutOfSample'+'/'+GwSAA+'_results_summary.csv')
results_wSAA = pd.read_csv(path_results+'/OutOfSample'+'/'+wSAA+'_results_summary.csv')
results_GwSAAR = pd.read_csv(path_results+'/OutOfSample'+'/'+GwSAAR+'_results_summary.csv')
results_wSAAR = pd.read_csv(path_results+'/OutOfSample'+'/'+wSAAR+'_results_summary.csv')
results_SAA = pd.read_csv(path_results+'/OutOfSample'+'/'+SAA+'_results_summary.csv')
results_ExPost = pd.read_csv(path_results+'/OutOfSample'+'/'+ExPost+'_results_summary.csv')

In [46]:
# Apply best in-sample model parameters to out-of-sample results
results_GwSAA_best = pd.merge(left = results_GwSAA_best[['CR', 'tau', 'product']],
                              right = results_GwSAA,
                              on = ['CR', 'tau', 'product'])
results_wSAA_best = pd.merge(left = results_wSAA_best[['CR', 'tau', 'product']],
                              right = results_wSAA,
                              on = ['CR', 'tau', 'product'])
results_GwSAAR_best = pd.merge(left = results_GwSAAR_best[['CR', 'tau', 'e', 'product']],
                              right = results_GwSAAR,
                              on = ['CR', 'tau', 'e', 'product'])
results_wSAAR_best = pd.merge(left = results_wSAAR_best[['CR', 'tau', 'e', 'product']],
                              right = results_wSAAR,
                              on = ['CR', 'tau', 'e', 'product'])
results_SAA_best = pd.merge(left = results_SAA_best[['CR', 'tau', 'product']],
                              right = results_SAA,
                              on = ['CR', 'tau', 'product'])

In [47]:
# Combine to one results data set
cols = ['model', 'CR', 'tau', 'e', 'product', 'K', 'u', 'h', 'b', 
        'I', 'q', 'I_q', 'y', 'I_q_y', 'n_stockouts', 'n_periods', 'c_o', 'c_s', 'cost',
       'defaulted', 'solutions', 'gap', 'exec_time_sec', 'cpu_time_sec']

results_GwSAA_best['model'] = copy.deepcopy(GwSAA)
results_GwSAA_best['e'] = None
results_GwSAA_best = results_GwSAA_best[cols]

results_wSAA_best['model'] = copy.deepcopy(wSAA)
results_wSAA_best['e'] = None
results_wSAA_best = results_wSAA_best[cols]

results_GwSAAR_best['model'] = copy.deepcopy(GwSAAR)
results_GwSAAR_best = results_GwSAAR_best[cols]

results_wSAAR_best['model'] = copy.deepcopy(wSAAR) 
results_wSAAR_best = results_wSAAR_best[cols]

results = pd.concat([results_GwSAA_best, results_wSAA_best, results_GwSAAR_best, results_wSAAR_best])

In [48]:
# Add SAA and ExPost
results = pd.merge(left=results,
                   right=results_SAA_best[['CR', 'product', 'cost', 'n_stockouts']],
                   on=['CR', 'product'],
                   suffixes=('', '_SAA'))
results = pd.merge(left=results,
                   right=results_ExPost[['CR', 'product', 'cost', 'n_stockouts']],
                   on=['CR', 'product'],
                   suffixes=('', '_ExPost'))

In [49]:
# Prescriptive performance per product and in total (by CR and model)
evaluation = Evaluation(**experiment_setup)
pq, pq_bygroup = evaluation.prescriptive_performance(results.cost, results.cost_SAA, 
                                                     ids_groups=results[['CR', 'model']],
                                                     groupby=['CR', 'model'])

In [50]:
# Store
results['pq'] = copy.deepcopy(pq)
results_total = copy.deepcopy(pq_bygroup)

In [53]:
results_total

Unnamed: 0,CR,model,cost,cost_saa,pq
0,0.5,GwSAA,7190934.0,9610582.5,0.748231
1,0.5,GwSAAR,7624199.0,9610582.5,0.793313
2,0.5,wSAA,7380000.5,9610582.5,0.767904
3,0.5,wSAAR,7682620.5,9610582.5,0.799392
4,0.75,GwSAA,9500080.0,13609385.5,0.698054
5,0.75,GwSAAR,10192336.5,13609385.5,0.74892
6,0.75,wSAA,10047891.5,13609385.5,0.738306
7,0.75,wSAAR,10221775.0,13609385.5,0.751083
8,0.9,GwSAA,13177714.0,21554345.5,0.611372
9,0.9,GwSAAR,13395011.0,21554345.5,0.621453


In [54]:
results.groupby(['CR', 'model']).agg({'pq': np.median}).reset_index()

Unnamed: 0,CR,model,pq
0,0.5,GwSAA,0.895422
1,0.5,GwSAAR,0.902711
2,0.5,wSAA,0.922889
3,0.5,wSAAR,0.906058
4,0.75,GwSAA,0.852891
5,0.75,GwSAAR,0.892164
6,0.75,wSAA,0.889117
7,0.75,wSAAR,0.876183
8,0.9,GwSAA,0.808148
9,0.9,GwSAAR,0.843193


In [69]:
evaluation = Evaluation(**experiment_setup)
Results_differences = evaluation.differences(results, test='paired')

In [70]:
Results_differences['sig'] = (

    (Results_differences.pvalue < 0.001) * 1
    + ((Results_differences.pvalue >= 0.001) & (Results_differences.pvalue < 0.01)) * 2
    + ((Results_differences.pvalue >= 0.01) & (Results_differences.pvalue < 0.05)) * 3
    + (Results_differences.pvalue >= 0.05) * 4
    
)

Results_differences['sig'] = pd.Categorical(Results_differences.sig)
Results_differences['sig'] = Results_differences.sig.cat.rename_categories({1: '***', 2: '**', 3: '*', 4: '-'})

In [71]:
Results_differences

Unnamed: 0,CR,model,benchmark,mean_of_differences,median_of_differences,share_model_is_better,share_cost_model_is_better,statistic,pvalue,sig
0,0.5,GwSAA,wSAA,-0.01772,-0.009588,0.575492,0.794998,17866.0,5.9158570000000004e-18,***
1,0.5,GwSAA,GwSAAR,-0.11762,0.0,0.426696,0.544875,35114.5,0.05526053,-
2,0.5,GwSAA,wSAAR,-0.115187,0.0,0.442013,0.556355,33227.0,0.01140354,*
3,0.5,wSAA,GwSAA,0.01772,0.009588,0.256018,0.202105,54524.0,1.0,-
4,0.5,wSAA,GwSAAR,-0.0999,0.005719,0.323851,0.324208,47067.0,0.9998453,-
5,0.5,wSAA,wSAAR,-0.097467,0.002868,0.33698,0.509828,46479.0,0.9997172,-
6,0.5,GwSAAR,GwSAA,0.11762,0.0,0.43326,0.453431,42306.5,0.9447395,-
7,0.5,GwSAAR,wSAA,0.0999,-0.005719,0.538293,0.67423,30748.0,0.0001546913,***
8,0.5,GwSAAR,wSAAR,0.002433,0.0,0.466083,0.720091,35298.0,0.118017,-
9,0.5,wSAAR,GwSAA,0.115187,0.0,0.413567,0.420365,43409.0,0.9885965,-


In [72]:
evaluation = Evaluation(**experiment_setup)
Results_differences = evaluation.differences(results, test='unpaired')

In [73]:
Results_differences['sig'] = (

    (Results_differences.pvalue < 0.001) * 1
    + ((Results_differences.pvalue >= 0.001) & (Results_differences.pvalue < 0.01)) * 2
    + ((Results_differences.pvalue >= 0.01) & (Results_differences.pvalue < 0.05)) * 3
    + (Results_differences.pvalue >= 0.05) * 4
    
)

Results_differences['sig'] = pd.Categorical(Results_differences.sig)
Results_differences['sig'] = Results_differences.sig.cat.rename_categories({1: '***', 2: '**', 3: '*', 4: '-'})

In [74]:
Results_differences

Unnamed: 0,CR,model,benchmark,difference_of_means,difference_of_medians,share_model_is_better,share_cost_model_is_better,statistic,pvalue,sig
0,0.5,GwSAA,wSAA,-0.01772,-0.027467,0.575492,0.794998,94332.0,0.005628,**
1,0.5,GwSAA,GwSAAR,-0.11762,-0.00729,0.426696,0.544875,100805.0,0.181855,-
2,0.5,GwSAA,wSAAR,-0.115187,-0.010636,0.442013,0.556355,99523.5,0.109332,-
3,0.5,wSAA,GwSAA,0.01772,0.027467,0.256018,0.202105,114517.0,0.994376,-
4,0.5,wSAA,GwSAAR,-0.0999,0.020177,0.323851,0.324208,110153.5,0.924839,-
5,0.5,wSAA,wSAAR,-0.097467,0.016831,0.33698,0.509828,108832.5,0.86583,-
6,0.5,GwSAAR,GwSAA,0.11762,0.00729,0.43326,0.453431,108044.0,0.818211,-
7,0.5,GwSAAR,wSAA,0.0999,-0.020177,0.538293,0.67423,98695.5,0.075196,-
8,0.5,GwSAAR,wSAAR,0.002433,-0.003347,0.466083,0.720091,103279.0,0.386937,-
9,0.5,wSAAR,GwSAA,0.115187,0.010636,0.413567,0.420365,109325.5,0.890715,-
