# Import required libraries

In [1]:
# Import utils
import numpy as np
import pandas as pd
import time
import pickle
import pyreadr
import json
import copy
from pathlib import Path
import joblib as joblib
from joblib import dump, load, Parallel, delayed
import os
import itertools
import contextlib
from tqdm import tqdm


# Import (Rolling Horizon) Weighted SAA models
from WeightedSAA2 import WeightedSAA
from WeightedSAA2 import RobustWeightedSAA
from WeightedSAA2 import RollingHorizonOptimization

# Minimum example

In [None]:
gwsaa = WeightedSAA()
rgwsaa = RobustWeightedSAA()

# Functions

## Data preparation

We first define a function that prepares the data needed for an experiment (depending on the model/approach). 

If no sampling strategy is provided via the optional argument 'sampling', no weights are retrieved, else 'global' or 'local' weights are retrieved and historical demands are prepared for 'global' or 'local' sampling, respectively. 
    
If the optional argument 'e' is provided, the function additionally outputs 'epsilon' which is the uncertainty set threshold for robust optimization.

In [None]:
## Prepare the data to a run an experiment over the full planning horizon
def prep_data(SKU, tau, T, sale_yearweek, path_data, path_samples, **kwargs):

    """

    This function prepares the data needed for (weighted, robust) optimization. If no sampling strategy is
    provided via the optional argument 'sampling', no weights are retrieved, else 'global' or 'local' weights
    are retrieved and historical demands are prepared for 'global' or 'local' sampling, respectively. If the
    optional argument 'e' is provided, the function additionally outputs 'epsilon' which is the uncertainty
    set threshold for robust optimization.

    Arguments:

        SKU: product (SKU) identifier
        tau: length of rolling look-ahead horizon
        T: Length T of the test horizon
        sale_yearweek: Last sale_yearweek of training data
        path_data: path of data
        path_samples: path of samples

    Optional arguments: 

        sampling: Sampling strategy (either 'global', 'local'), with
            - 'global': uses weights generated with global training
            - 'local': uses weights generated with local training
        e: Robust uncertainty set threshold multiplier, with
            - int: uses e as multiplier for product's in sample standard deviation as the uncertainty set threshold 

    Output:

        y: demand data - np.array of shape (n_samples, n_periods)
        ids_train: list of selector series (True/False of length n_samples) - list with lengths of the test horizon
        ids_test: list of selector series (True/False of length n_samples) - list with lengths of the test horizon

        weights (optional): list of weights (flat np.array of length ids_train of t'th test period) - list 
        with length of test horizon
        epsilons (optional): list of epsilons - list with length of the test horizon

    """

    # Demand samples
    robj = pyreadr.read_r(path_data+'/Y_Data_mv_NEW.RData')
    y_samples = np.array(robj['Y_Data_mv'])

    # IDs of local demand samples
    robj = pyreadr.read_r(path_data+'/ID_Data_NEW.RData')
    ID_samples = robj['ID_Data']

    # IDs of local demand samples
    robj = pyreadr.read_r(path_samples+'/SKU'+str(SKU)+'/Static/TmpFiles'+
                          str(tau)+'/ID_samples_k.RDS')
    ID_samples_SKU = robj[None]

    # If sampling strategy is provided
    if 'sampling' in kwargs:

        # Weights
        with open(path_samples+'/SKU'+str(SKU)+'/Static/Weights'+
                  str(tau)+'/weights_'+kwargs['sampling']+'_ij.p', 'rb') as f:
            weighty_ij = pickle.load(f)
        del f

        # Demand samples for global sampling
        if kwargs['sampling'] == 'global':
            y = y_samples

        # Demand samples for local sampling
        if kwargs['sampling'] == 'local':
            y = y_samples[ID_samples.SKU_API == ID_samples_SKU.SKU_API[0]]

    # Default: local demand samples
    else:
        y = y_samples[ID_samples.SKU_API == ID_samples_SKU.SKU_API[0]]


    # Reshape data for each t=1...T (i.e., each period of the test horizon)
    ids_train = []
    ids_test = []

    weights = [] if 'sampling' in kwargs else None
    epsilons = [] if 'e' in kwargs else None

    # Iterate over t
    for t in range(T):

        # If sampling strategy is provided
        if 'sampling' in kwargs:

            # IDs of demand samples for global sampling
            if kwargs['sampling'] == 'global':
                ids_train = ids_train + [ID_samples.sale_yearweek < sale_yearweek+t]

            # IDs of demand samples for local sampling
            if kwargs['sampling'] == 'local':
                ids_train = ids_train + [(ID_samples.SKU_API == ID_samples_SKU.SKU_API[0]) &
                                         (ID_samples.sale_yearweek < sale_yearweek+t)]                   

            # Weights for global/local
            weights = weights + [weighty_ij[t+1]]

        # Default: IDs of demand samples for local sampling
        else:
            ids_train = ids_train + [(ID_samples.SKU_API == ID_samples_SKU.SKU_API[0]) &
                                         (ID_samples.sale_yearweek < sale_yearweek+t)]



        # IDs of demand samples for testing 
        ids_test = ids_test + [(ID_samples.SKU_API == ID_samples_SKU.SKU_API[0]) &
                                         (ID_samples.sale_yearweek == sale_yearweek+t)]


        # If e is provided, calculate robust optimization parameter epsilon
        if 'e' in kwargs:
            epsilons = epsilons + [kwargs['e']*np.std(y_samples[(ID_samples.SKU_API == ID_samples_SKU.SKU_API[0]) &
                                                                (ID_samples.sale_yearweek < sale_yearweek+t),0])]


    # Return
    return y, ids_train, ids_test, weights, epsilons

## Experiment wrapper

We now define a 'wrapper' function that iterates the experiment for a given SKU over differen cost parameter settins and lengths of the rolling look-ahead horizon tau.

If the parameter sampling is provided (either 'global' or 'local), the function uses the specified sampling strategy. Else, SAA is performed. If the multiplier 'e' for the uncertainty set threshold epsilon is provided, the function performs the robust extension.
 
Where: epsilon[t] = e *  in-sample standard deviation of the current product (SKU).

The function prepares and calls the experiment over t=1...T for each cost paramater setting and look-ahead horizon tau and then summarises the results including performance and performance meta inormation. It also saves the results in CSV format to the specified path and the function can also be used in parallel processing environments.

In [None]:
def run_experiments(SKU, **kwargs):
    
    """
    
    Description ...
    
    
    Arguments:
    
        SKU: product (SKU) identifier
        sale_yearweek: Last sale_yearweek of training data
        T: Length T of the test horizon
        tau: List of lengths of rolling look-ahead horizons
        cost_params: dictionary/dictionary of dictionaries of cost parameters {'CR', 'K', 'u', 'h', 'b'}
        gurobi_params: dictionary of gurobi meta params {'LogToConsole', 'Threads', 'NonConvex' 
                                                         'PSDTol', 'MIPGap', 'NumericFocus',
                                                         'obj_improvement', obj_timeout_sec'}
        path: directory where results should be saved
        model_name: model name for the file to save results
        
    Optional arguments:
    
        sampling: sampling strategy (either 'global' or 'local'); performs SAA if not provided
        e: robust uncertainty set threshold multiplier; performs no robust extension if not provided
    

    """
  
    st_exec = time.time()
    st_cpu = time.process_time()
    
    # Print progress
    if kwargs['print_progress']: 
        print('SKU:', SKU)
    
    # Initialize
    rhopt = RollingHorizonOptimization()
    results = pd.DataFrame()
   
    # For each cost param setting
    for cost_params in kwargs['cost_params'].values():
        
        # Print progress
        if kwargs['print_progress']: 
            print('... cost param setting:', cost_params)
    
        # For each rolling look-ahead horizon
        for tau in kwargs['tau']:
            
            # Print progress
            if kwargs['print_progress']: 
                print('...... look-ahead horizon:', tau)
    
            ## Weighted (Robust) SAA
            if 'sampling' in kwargs:
    
                ## Weighted Robust SAA  
                if 'e' in kwargs:

                    # Prepare data
                    data = prep_data(SKU, tau, kwargs['T'], kwargs['sale_yearweek'], PATH_DATA, PATH_SAMPLES, sampling=kwargs['sampling'], e=kwargs['e'])
                    y, ids_train, ids_test, weights, epsilons = data
                    
                    # Create empty model
                    wsaamodel = RobustWeightedSAA(**kwargs['gurobi_params'])

                    # Run rolling horizon model over t=1...T
                    result = rhopt.run(y, ids_train, ids_test, tau, wsaamodel, weights=weights, epsilons=epsilons, **cost_params)

                ## Weighted SAA
                else: 
                    
                    # Prepare data
                    data = prep_data(SKU, tau, kwargs['T'], kwargs['sale_yearweek'], PATH_DATA, PATH_SAMPLES, sampling=kwargs['sampling'])
                    y, ids_train, ids_test, weights, _ = data
                    
                    # Create empty model
                    wsaamodel = WeightedSAA(**kwargs['gurobi_params'])

                    # Run rolling horizon model over t=1...T
                    result = rhopt.run(y, ids_train, ids_test, tau, wsaamodel, weights=weights, **cost_params)


            ## SAA
            else:
                
                # Prepare data
                data = prep_data(SKU, tau, kwargs['T'], kwargs['sale_yearweek'], PATH_DATA, PATH_SAMPLES)
                y, ids_train, ids_test, _, _ = data

                # Create empty model
                wsaamodel = WeightedSAA(**kwargs['gurobi_params'])

                # Run rolling horizon model over t=1...T
                result = rhopt.run(y, ids_train, ids_test, tau, wsaamodel, **cost_params)

            
            ## ToDo: ExPost
            
            # Store result
            meta = pd.DataFrame({

                'SKU': np.repeat(SKU,kwargs['T']),
                'n_periods': np.repeat(kwargs['T'],kwargs['T']),
                'tau': np.repeat(tau,kwargs['T']),
                'CR': np.repeat(cost_params['CR'],kwargs['T']),
                'LogToConsole': np.repeat(kwargs['gurobi_params']['LogToConsole'],kwargs['T']),
                'Threads': np.repeat(kwargs['gurobi_params']['Threads'],kwargs['T']),
                'NonConvex': np.repeat(kwargs['gurobi_params']['NonConvex'],kwargs['T']),
                'PSDTol': np.repeat(kwargs['gurobi_params']['PSDTol'],kwargs['T']),
                'MIPGap': np.repeat(kwargs['gurobi_params']['MIPGap'],kwargs['T']),
                'NumericFocus': np.repeat(kwargs['gurobi_params']['NumericFocus'],kwargs['T']),
                'obj_improvement': np.repeat(kwargs['gurobi_params']['obj_improvement'],kwargs['T']),
                'obj_timeout_sec': np.repeat(kwargs['gurobi_params']['obj_timeout_sec'],kwargs['T']),
                'e': np.repeat(kwargs['e'],kwargs['T']) if 'e' in kwargs else np.repeat(0,kwargs['T']),
                'epsilon': [epsilon for epsilon in epsilons] if 'e' in kwargs else np.repeat(0,kwargs['T'])
            })

            result = pd.concat([meta, result], axis=1)

            # Store
            if not results.empty:
                results = results.append(result)   
            else:
                results = pd.DataFrame(result) 

    # Save result
    save_log = results.to_csv(
        path_or_buf=kwargs['path']+'/'+kwargs['model_name']+'_SKU'+str(SKU)+(('_e'+str(kwargs['e'])) if 'e' in kwargs else '')+'.csv', 
        sep=',', index=False
    )
    
    
    # Time
    exec_time_sec = time.time() - st_exec
    cpu_time_sec = time.process_time() - st_cpu
    
    # Print progress
    if kwargs['print_progress']: 
        print('>>>> Done:',str(np.around(exec_time_sec/60,1)), 'minutes')

    
    # Returns results 
    if (kwargs['return_results'] if 'return_results' in kwargs else False):
        return results
    
    # Returns a log
    else:
        return  {'SKU': SKU, 'exec_time_sec': exec_time_sec, 'cpu_time_sec': cpu_time_sec}

**Context Manager**

This is a context manager for parellel execution with the purpose of reporting progress. 

Credits: https://stackoverflow.com/questions/24983493/tracking-progress-of-joblib-parallel-execution

In [None]:
@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        def __call__(self, *args, **kwargs):
            tqdm_object.update(n=self.batch_size)
            return super().__call__(*args, **kwargs)

    old_batch_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_batch_callback
        tqdm_object.close()

# Experiments

In [None]:
# Set folder names as global variables
os.chdir('/home/fesc/')
global PATH_DATA, PATH_PARAMS, PATH_SAMPLES, PATH_RESULTS

PATH_DATA = '/home/fesc/MM/Data'
PATH_PARAMS  = '/home/fesc/MM/Data/Params'
PATH_SAMPLES = '/home/fesc/MM/Data/Samples'
PATH_RESULTS = '/home/fesc/MM/Data/Results'

For the models specified, the code below runs the experiment for all given products (SKUs) and over parameter settings (e.g., cost parameters, horizon parameters, etc.). In total, we have 460 products (SKUs) with each 3 different cost parameter settings varying the critical ratio (CR) of holding and backlogging cost being {CR=0.50, CR=0.75, CR=0.90) and each 5 different lengths of the rolling look-ahead horizon tau being {1,2,3,4,5}.

## (a) Rolling Horizon Global Weighted SAA

In [None]:
# Define paramaters
params = {
            
    # Sampling strategy
    'sampling': 'global',

    # Last sale_yearweek of training data
    'sale_yearweek': 114,

    # Length T of the test horizon
    'T': 13,

    # Lengths of rolling look-ahead horizons
    'tau': [1,2,3,4,5],

    # Cost param settings
    'cost_params': {

        1: {'CR': 0.50, 'K': 100, 'u': 0.5, 'h': 1, 'b': 1},
        2: {'CR': 0.75, 'K': 100, 'u': 0.5, 'h': 1, 'b': 3},
        3: {'CR': 0.90, 'K': 100, 'u': 0.5, 'h': 1, 'b': 9}

    },

    # Gurobi meta params
    'gurobi_params': {

        'LogToConsole': 1, 
        'Threads': 1, 
        'NonConvex': 2, 
        'PSDTol': 1e-3, # 0.1%
        'MIPGap': 1e-3, # 0.1%
        'NumericFocus': 0, 
        'obj_improvement': 1e-3, # 0.1%
        'obj_timeout_sec': 3*60, # 3 min
        'obj_timeout_max_sec': 10*60, # 10 min

    },
    
    'path': PATH_RESULTS+'/GwSAA',
    'model_name': 'GwSAA',
    
    'print_progress': False,
    'return_results': False
    
}

In [None]:
# Set path
#os.mkdir(params['path'])
       
# Specify number of cores to use for parallel execution
n_jobs = 32

# Specify range of products (SKUs) to iterate over
SKU_range = range(1,460+1)

# Run for each product (SKU) in parallel
with tqdm_joblib(tqdm(desc='Progress', total=len(SKU_range))) as progress_bar:
    resultslog = Parallel(n_jobs=n_jobs)(delayed(run_experiments)(SKU, **params)
                                         for SKU in SKU_range)

## (b) Rolling Horizon Global Robust Weighted SAA

In [None]:
# Define paramaters
params = {
            
    # Sampling strategy
    'sampling': 'global',

    # Robust uncertainty set threshold multiplier
    'e': None,

    # Last sale_yearweek of training data
    'sale_yearweek': 114,

    # Length T of the test horizon
    'T': 13,

    # Lengths of rolling look-ahead horizons
    'tau': [1,2,3,4,5],

    # Cost param settings
    'cost_params': {

        1: {'CR': 0.50, 'K': 100, 'u': 0.5, 'h': 1, 'b': 1},
        2: {'CR': 0.75, 'K': 100, 'u': 0.5, 'h': 1, 'b': 3},
        3: {'CR': 0.90, 'K': 100, 'u': 0.5, 'h': 1, 'b': 9}

    },

    # Gurobi meta params
    'gurobi_params': {

        'LogToConsole': 1, 
        'Threads': 1, 
        'NonConvex': 2, 
        'PSDTol': 1e-3, # 0.1%
        'MIPGap': 1e-3, # 0.1%
        'NumericFocus': 3, 
        'obj_improvement': 1e-3, # 0.1%
        'obj_timeout_sec': 3*60, # 3 min
        'obj_timeout_max_sec': 10*60, # 10 min

    },
    
    'path': PATH_RESULTS+'/GwSAAR',
    'model_name': 'GwSAAR',
    
    'print_progress': False,
    'return_results': False
    
}

In [None]:
# Set path
#os.mkdir(params['path'])

# Specify number of cores to use for parallel execution
n_jobs = 32

# Specify range of products (SKUs) to iterate over
SKU_range = range(1,460+1)

# Uncertainty set
params['e'] = 1

# Run for each product (SKU) in parallel
with tqdm_joblib(tqdm(desc='Progress', total=len(SKU_range))) as progress_bar:
    resultslog = Parallel(n_jobs=n_jobs)(delayed(run_experiments)(SKU, **params)
                                         for SKU in SKU_range)

In [None]:
# Set path
#os.mkdir(params['path'])

# Specify number of cores to use for parallel execution
n_jobs = 32

# Specify range of products (SKUs) to iterate over
SKU_range = range(1,460+1)

# Uncertainty set
params['e'] = 3

# Run for each product (SKU) in parallel
with tqdm_joblib(tqdm(desc='Progress', total=len(SKU_range))) as progress_bar:
    resultslog = Parallel(n_jobs=n_jobs)(delayed(run_experiments)(SKU, **params)
                                         for SKU in SKU_range)

In [None]:
# Set path
#os.mkdir(params['path'])

# Specify number of cores to use for parallel execution
n_jobs = 32

# Specify range of products (SKUs) to iterate over
SKU_range = range(1,460+1)

# Uncertainty set
params['e'] = 6

# Run for each product (SKU) in parallel
with tqdm_joblib(tqdm(desc='Progress', total=len(SKU_range))) as progress_bar:
    resultslog = Parallel(n_jobs=n_jobs)(delayed(run_experiments)(SKU, **params)
                                         for SKU in SKU_range)

In [None]:
# Set path
#os.mkdir(params['path'])

# Specify number of cores to use for parallel execution
n_jobs = 32

# Specify range of products (SKUs) to iterate over
SKU_range = range(1,460+1)

# Uncertainty set
params['e'] = 12

# Run for each product (SKU) in parallel
with tqdm_joblib(tqdm(desc='Progress', total=len(SKU_range))) as progress_bar:
    resultslog = Parallel(n_jobs=n_jobs)(delayed(run_experiments)(SKU, **params)
                                         for SKU in SKU_range)

## (c) Rolling Horizon Local Weighted SAA

## (d) Rolling Horizon Local Robust Weighted SAA

## (e) Rolling Horizon SAA

## (f) Ex-post optimal, deterministic model

# Aggregate all results