In [2]:
import numpy as np
import pandas as pd
import datetime as dt
from dateutil.relativedelta import relativedelta
from tezcatli_scripts import load_data, utils, pre_process , write_to_database as w2d #, get_ts_features , fit_models,
from tezcatli_scripts.fit_models import  Darts, Orbit

### For tracking execution times
from os import path
import time
#import random
from random import getrandbits, seed, sample
import pickle

#For reading options from command line
import sys, getopt

import warnings
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [20,6]
pd.set_option('display.max_columns',12)
pd.set_option('display.width', 1000)

In [3]:
from darts.models import Prophet
from darts.models import (
    NaiveSeasonal,
    NaiveDrift,
    NaiveMean,
    ExponentialSmoothing,
    AutoARIMA,
    #StandardRegressionModel,
    Theta,
    FFT,
    Croston,
    LightGBMModel,
    RandomForest,
    RegressionEnsembleModel,
    TBATS,
    BATS,
    RegressionModel)
from darts.metrics import mape, smape, mase
from orbit.models import DLT


In [4]:
#%% Models to run ###
model_frames = ['Darts','Orbit']
dart_models = [ExponentialSmoothing(), NaiveSeasonal(), NaiveDrift(), NaiveMean(), AutoARIMA(), Theta(), FFT(), Prophet(),Croston(),LightGBMModel(lags=1),RandomForest(lags= 1,random_state=2309),RegressionEnsembleModel(forecasting_models=[ExponentialSmoothing(), NaiveSeasonal(),AutoARIMA(),TBATS()],regression_train_n_points=24),TBATS(),BATS(),RegressionModel(lags=1),RegressionModel(lags=10)]#, StandardRegressionModel()]
dart_models_names = ['ExponentialSmoothing', 'NaiveSeasonal','NaiveDrift','NaiveMean','AutoARIMA','Theta', 'FFT','Prophet','Croston','LightGBMModel','RandomForest','RegressionEnsembleModel','TBATS','BATS','RegressionModelL1','RegressionModelL10']#, 'StandardRegression']
models = [DLT(),ExponentialSmoothing(), NaiveSeasonal(), NaiveDrift(), NaiveMean(), AutoARIMA()]#, Theta(), FFT(), Prophet(),Croston(),LightGBMModel(lags=1),RandomForest(lags= 1,random_state=2309),TBATS(),BATS(),RegressionModel(lags=1),RegressionModel(lags=10)]#, StandardRegressionModel(),RegressionEnsembleModel(forecasting_models=[ExponentialSmoothing(), NaiveSeasonal(),AutoARIMA(),TBATS()],regression_train_n_points=24)]
models_names = ['Orbit','ExponentialSmoothing', 'NaiveSeasonal','NaiveDrift','NaiveMean','AutoARIMA']#,'Theta', 'FFT','Prophet','Croston','LightGBMModel','RandomForest','TBATS','BATS','RegressionModelL1','RegressionModelL10']#, 'StandardRegression','RegressionEnsembleModel']
orbit_models = [DLT()]
orbit_models_names = ['DampedLinearTrend']

In [5]:
run_config = utils.read_params_in_from_json('run_config.json')
group_key = run_config['dimensions'].split('-')
#seed(2309)
run_id = getrandbits(32)
run_date = dt.datetime.today().date()
run_datascientist = run_config['data_scientist']
run_scope = run_config['scope']
run_response = run_config['response']
run_timegrain = run_config['timegrain']
run_dimensions = run_config['dimensions']
run_type = run_config['type']
holdout_horizon = run_config['train_horizon']
forecast_horizon = run_config['forecast_horizon']

test_run = True

write_file = 'yes'

In [6]:
prep_comp_prod = pd.read_pickle('prep_init.pkl')
prep_comp_prod.head()

Unnamed: 0,Order_Create_Date,Order_Volume_(STD),group_key,run_id
0,2014-04-01,1940915.25,Carolinas_C+ St Plk,546085281
1,2014-05-01,2353236.0,Carolinas_C+ St Plk,546085281
2,2014-06-01,2296094.5,Carolinas_C+ St Plk,546085281
3,2014-07-01,2512645.0,Carolinas_C+ St Plk,546085281
4,2014-08-01,2194348.25,Carolinas_C+ St Plk,546085281


In [7]:
prep_comp_prod['group_key'].unique()

array(['Carolinas_C+ St Plk', 'Canada East_C+ St NT3',
       'Canada East_C+ St Plk', 'Canada East_C+ St Pnl',
       'Canada East_Int 1/2 Inch', 'Canada East_Int 1/4 Inch',
       'Canada East_Pr Pnl', 'Canada West_C+ St NT3',
       'Canada West_C+ St Plk', 'Canada West_C+ St Pnl',
       'Canada West_Int 1/2 Inch', 'Canada West_Int 1/4 Inch',
       'Canada West_Pr Plk', 'Canada West_Pr Pnl', 'Carolinas_C+ St HLD',
       'Carolinas_C+ St NT3', 'Carolinas_C+ St Pnl',
       'Carolinas_Int 1/2 Inch', 'Carolinas_Int 1/4 Inch',
       'Carolinas_Pr CemPre', 'Carolinas_Pr HLD', 'Carolinas_Pr Plk',
       'Carolinas_Pr Pnl', "Carolinas_Pr Soff 12'",
       "Carolinas_Pr Soff 8'", 'Mid Atlantic_C+ St NT3',
       'Mid Atlantic_C+ St Plk', 'Mid Atlantic_C+ St Pnl',
       'Mid Atlantic_Int 1/2 Inch', 'Mid Atlantic_Int 1/4 Inch',
       'Mid Atlantic_Pr Plk', 'Mid Atlantic_Pr Pnl',
       "Mid Atlantic_Pr Soff 12'", "Mid Atlantic_Pr Soff 8'",
       'Mid South_C+ St HLD', 'Mid South_C+ St 

In [8]:
t_train_start = time.time()
prod_dfs, prod_accs,failed_keys = [],[],[]
#ts_feats = []
## Certain stat models have constraints on length of time series , see later checks##
time_models = ['ExponentialSmoothing']
time_models2 = ['AutoARIMA']
time_models3 = ['RegressionEnsembleModel','TBATS','BATS']
cnt = 0
keys = prep_comp_prod['group_key'].unique()

In [9]:
#key = np.where(keys=='Midwest Central_C+ St Plk')
#keys[key].item()
key = 'Midwest Central_C+ St Plk'

In [10]:
prod_df = prep_comp_prod[prep_comp_prod['group_key']==key]
prod_df

Unnamed: 0,Order_Create_Date,Order_Volume_(STD),group_key,run_id
5076,2014-04-01,1597544.75,Midwest Central_C+ St Plk,546085281
5077,2014-05-01,1657481.75,Midwest Central_C+ St Plk,546085281
5078,2014-06-01,1952362.00,Midwest Central_C+ St Plk,546085281
5079,2014-07-01,1967361.25,Midwest Central_C+ St Plk,546085281
5080,2014-08-01,1916091.00,Midwest Central_C+ St Plk,546085281
...,...,...,...,...
5179,2022-11-01,1996800.25,Midwest Central_C+ St Plk,546085281
5180,2022-12-01,2259604.25,Midwest Central_C+ St Plk,546085281
5181,2023-01-01,1872753.25,Midwest Central_C+ St Plk,546085281
5182,2023-02-01,1313500.75,Midwest Central_C+ St Plk,546085281


In [11]:
models_list = list(zip(models,models_names))

In [12]:
models_list

[(<orbit.forecaster.full_bayes.FullBayesianForecaster at 0x2622adf3a48>,
  'Orbit'),
 (ExponentialSmoothing(trend=ModelMode.ADDITIVE, damped=False, seasonal=SeasonalityMode.ADDITIVE, seasonal_periods=None, random_state=0),
  'ExponentialSmoothing'),
 (NaiveSeasonal(K=1), 'NaiveSeasonal'),
 (NaiveDrift(), 'NaiveDrift'),
 (NaiveMean(), 'NaiveMean'),
 (AutoARIMA(add_encoders=None), 'AutoARIMA')]

In [13]:
run_mofcst = dt.datetime(run_config['current_year'],run_config['current_month'],1)
train_date = run_mofcst + relativedelta(months=-holdout_horizon)

In [14]:
def fit_models_parallel(models_list_item,prod_df, run_mofcst,train_date):#,model_dfs, model_accs):
    name = models_list_item[1]
    model = models_list_item[0]
    if (name == 'Orbit'):
        model_frame = Orbit(model,prod_df,run_mofcst,train_date=train_date,forecast_horizon=None)
    else:
        model_frame = Darts(model,prod_df, run_mofcst, train_date,forecast_horizon=None)
    #print(model_frame)

    #### Create time series
    #prod_ts = pre_process.create_ts(prod_df,run_mofcst)
    model_frame.prep_data()
    #### Split train and test sets for holdout accuracy
    #train,val = prod_ts.split_before(pd.Timestamp(train_date))
    model_frame.split_data()
    #TODO create fit function (returns model params)
    # check for length , Expo can't handle less than 24, autoarima needs 30
#     if ( (len(model_frame.train)<24) & (name in time_models) ):
#         failed_keys.append((key,name))
#         print('error')
#     elif ( (len(model_frame.train)<30) & (name in time_models2)) :
#         failed_keys.append((key,name))
#         print('error')
#     elif ( (name in time_models3) & (model_frame.get_train_df_ordervol().tail(12).sum()<12) ):
#         failed_keys.append((key,name))
#         print('error')
    #### Fit and pred ##
    t_modelfitpred_start = time.time()
    try:
        model_frame.train_model()
    except ZeroDivisionError:
        jhds_logger.error(f'A zero division error occurred with key {key} in training model {name}')

    try:
        pred = model_frame.pred_model()
    except ValueError:
        jhds_logger.error(f'Training model {name} with key {key} failed due to NaN, infinity or too large number')

    pred_df = pred.pd_dataframe()
    pred_df['model'] = name
    pred_df.rename(columns={'0':'fcst'},inplace=True)
    #model_dfs.append(pred_df)
    #### Accuracies ##
    accuracies = pd.DataFrame()
    accuracies['mape'] = pd.Series(mape(model_frame.val,pred))
    accuracies['smape'] = pd.Series(smape(model_frame.val,pred))
    accuracies['mase'] = pd.Series(mase(model_frame.val,pred,insample=model_frame.train))
    accuracies['model'] = name
    #model_accs.append(accuracies)
    return pred_df#, accuracies
    #return model_frame

In [15]:
log_file_path = path.join(path.dirname(path.abspath('params/log.conf')), 'log.conf')
jhds_logger = utils.setup_logger(log_file_path)
jhds_logger.info('Finished setup')

In [16]:
import time
from joblib import Parallel, delayed, parallel_backend

In [17]:
import os
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_DYNAMIC'] = 'FALSE'

In [18]:
model_dfs, model_accs = [],[]

In [19]:
start_time = time.perf_counter()
with parallel_backend('threading',n_jobs=4):
    model_dfs.append(Parallel()(delayed(fit_models_parallel)(i,prod_df, run_mofcst,train_date) for i in models_list))#,model_dfs, model_accs) for i in models_list))
finish_time = time.perf_counter()
print(f"Program finished in {finish_time-start_time} seconds")

Program finished in 5.515809900000001 seconds


In [20]:
model_dfs

[[component          Order_Volume_(STD)  model
  Order_Create_Date                           
  2022-10-01               2.211990e+06  Orbit
  2022-11-01               1.982950e+06  Orbit
  2022-12-01               2.174170e+06  Orbit
  2023-01-01               1.604974e+06  Orbit
  2023-02-01               1.665276e+06  Orbit
  2023-03-01               2.390136e+06  Orbit,
  component          Order_Volume_(STD)                 model
  Order_Create_Date                                          
  2022-10-01               2.306495e+06  ExponentialSmoothing
  2022-11-01               2.011083e+06  ExponentialSmoothing
  2022-12-01               1.882445e+06  ExponentialSmoothing
  2023-01-01               1.683485e+06  ExponentialSmoothing
  2023-02-01               1.892165e+06  ExponentialSmoothing
  2023-03-01               2.720816e+06  ExponentialSmoothing,
  component          Order_Volume_(STD)          model
  Order_Create_Date                                   
  2022-10-01    

In [21]:
#from multiprocessing import Pool

In [22]:
# model_dfs, model_accs = [],[]
# model_args = [(models_list,prod_df, run_mofcst,train_date,model_dfs, model_accs)]
# model_args

In [23]:
# def PoolHandler():
#     p = Pool(6)
#     result = p.starmap(fit_models_parallel,iterable=model_args)
#     return result

In [24]:
# PoolHandler()

In [25]:
models_list

[(<orbit.forecaster.full_bayes.FullBayesianForecaster at 0x2622adf3a48>,
  'Orbit'),
 (ExponentialSmoothing(trend=ModelMode.ADDITIVE, damped=False, seasonal=SeasonalityMode.ADDITIVE, seasonal_periods=None, random_state=0),
  'ExponentialSmoothing'),
 (NaiveSeasonal(K=1), 'NaiveSeasonal'),
 (NaiveDrift(), 'NaiveDrift'),
 (NaiveMean(), 'NaiveMean'),
 (AutoARIMA(add_encoders=None), 'AutoARIMA')]

In [26]:
def fit_models_parallel_keys(item,prod_df, run_mofcst,train_date):#,model_dfs, model_accs):
    key = item[0]
    models_list_item= item[1]
    t_model_start = time.time()
    name = models_list_item[1]
    model = models_list_item[0]
    data_df = prod_df[prod_df['group_key']==key]
    if (name == 'Orbit'):
        model_frame = Orbit(model,data_df,run_mofcst,train_date=train_date,forecast_horizon=None)
    else:
        model_frame = Darts(model,data_df, run_mofcst, train_date,forecast_horizon=None)
    #print(model_frame)

    #### Create time series
    #prod_ts = pre_process.create_ts(prod_df,run_mofcst)
    model_frame.prep_data()
    #### Split train and test sets for holdout accuracy
    #train,val = prod_ts.split_before(pd.Timestamp(train_date))
    model_frame.split_data()
    #TODO create fit function (returns model params)
    # check for length , Expo can't handle less than 24, autoarima needs 30
#     if ( (len(model_frame.train)<24) & (name in time_models) ):
#         failed_keys.append((key,name))
#         print('error')
#     elif ( (len(model_frame.train)<30) & (name in time_models2)) :
#         failed_keys.append((key,name))
#         print('error')
#     elif ( (name in time_models3) & (model_frame.get_train_df_ordervol().tail(12).sum()<12) ):
#         failed_keys.append((key,name))
#         print('error')
    #### Fit and pred ##
    t_modelfitpred_start = time.time()
    try:
        model_frame.train_model()
    except ZeroDivisionError:
        jhds_logger.error(f'A zero division error occurred with key {key} in training model {name}')
    except AttributeError: 
        jhds_logger.error(f"'NoneType' object has no attribute 'forecast', with key {key} in training model {name}")

    try:
        pred = model_frame.pred_model()
        pred_df = pred.pd_dataframe()
        #### Accuracies ##
        accuracies = pd.DataFrame()
        accuracies['mape'] = pd.Series(mape(model_frame.val,pred))
        accuracies['smape'] = pd.Series(smape(model_frame.val,pred))
        accuracies['mase'] = pd.Series(mase(model_frame.val,pred,insample=model_frame.train))
    except ValueError:
        pred_df = pd.DataFrame()
        accuracies = pd.DataFrame()
        jhds_logger.error(f'Training model {name} with key {key} failed due to NaN, infinity or too large number')
    except AttributeError: 
        jhds_logger.error(f"'NoneType' object has no attribute 'forecast', with key {key} in training model {name}")

    pred_df['model'] = name
    pred_df.rename(columns={'0':'fcst'},inplace=True)
    pred_df['group_key'] = key
    #model_dfs.append(pred_df)
    accuracies['model'] = name
    accuracies['group_key'] = key
    #model_accs.append(accuracies)
    #return pred_df#, accuracies
    #return model_frame
    # accs_df = pd.concat(model_accs)
    # accs_df['group_key'] = key
    # models_df = pd.concat(model_dfs)
    # models_df['group_key'] = key
    # prod_dfs.append(models_df)
    # prod_accs.append(accs_df)
    # #t_inner.toc('Product Model fit and preds took ')
    t_model_end = time.time()
    jhds_logger.info(f'Finished cycling through models for key: {key}, cycle took {"{:.2f}".format((t_model_end-t_model_start)/60)} minutes')
    #return pred_df
    ##return accuracies
    return [pred_df, accuracies]

In [27]:
pred_test = pd.DataFrame()
pred_test

In [28]:
import random

In [29]:
#keys = ['Midwest Central_C+ St Plk', 'Midwest Central_Pr Plk'] 
#keys = random.sample(list(prep_comp_prod['group_key'].unique()),10)
with open('keys.pkl', 'rb') as f:
    keys = pickle.load(f)
keys

['Midwest East_C+ St Plk',
 'Southeast_Int 1/4 Inch',
 'Mid Atlantic_Int 1/2 Inch',
 'Mountain_Pr Plk',
 "Mid Atlantic_Pr Soff 8'",
 'Mid South_C+ St Pnl',
 'Pacific Northwest_Pr HLD',
 'North Atlantic_C+ St Pnl',
 'Pacific Northwest_Pr CemPre',
 'Southeast_C+ St HLD']

In [30]:
keys_models = [(key,model) for key in keys for model in models_list]
keys_models    

[('Midwest East_C+ St Plk',
  (<orbit.forecaster.full_bayes.FullBayesianForecaster at 0x2622adf3a48>,
   'Orbit')),
 ('Midwest East_C+ St Plk',
  (ExponentialSmoothing(trend=ModelMode.ADDITIVE, damped=False, seasonal=SeasonalityMode.ADDITIVE, seasonal_periods=None, random_state=0),
   'ExponentialSmoothing')),
 ('Midwest East_C+ St Plk', (NaiveSeasonal(K=1), 'NaiveSeasonal')),
 ('Midwest East_C+ St Plk', (NaiveDrift(), 'NaiveDrift')),
 ('Midwest East_C+ St Plk', (NaiveMean(), 'NaiveMean')),
 ('Midwest East_C+ St Plk', (AutoARIMA(add_encoders=None), 'AutoARIMA')),
 ('Southeast_Int 1/4 Inch',
  (<orbit.forecaster.full_bayes.FullBayesianForecaster at 0x2622adf3a48>,
   'Orbit')),
 ('Southeast_Int 1/4 Inch',
  (ExponentialSmoothing(trend=ModelMode.ADDITIVE, damped=False, seasonal=SeasonalityMode.ADDITIVE, seasonal_periods=None, random_state=0),
   'ExponentialSmoothing')),
 ('Southeast_Int 1/4 Inch', (NaiveSeasonal(K=1), 'NaiveSeasonal')),
 ('Southeast_Int 1/4 Inch', (NaiveDrift(), 'NaiveD

In [31]:
prod_df = prep_comp_prod[prep_comp_prod['group_key'].isin(keys)]
prod_df

Unnamed: 0,Order_Create_Date,Order_Volume_(STD),group_key,run_id
3024,2014-04-01,1.588058e+06,Mid Atlantic_Int 1/2 Inch,546085281
3025,2014-05-01,1.645750e+06,Mid Atlantic_Int 1/2 Inch,546085281
3026,2014-06-01,1.259618e+06,Mid Atlantic_Int 1/2 Inch,546085281
3027,2014-07-01,1.333630e+06,Mid Atlantic_Int 1/2 Inch,546085281
3028,2014-08-01,1.468310e+06,Mid Atlantic_Int 1/2 Inch,546085281
...,...,...,...,...
15547,2022-11-01,1.000000e-06,Pacific Northwest_Pr HLD,546085281
15548,2022-12-01,1.000000e-06,Pacific Northwest_Pr HLD,546085281
15549,2023-01-01,1.942232e+05,Pacific Northwest_Pr HLD,546085281
15550,2023-02-01,7.492120e+04,Pacific Northwest_Pr HLD,546085281


In [32]:
#model_dfs = []
model_accs_dfs = []
start_time = time.perf_counter()
with parallel_backend('threading',n_jobs=4):
    #model_dfs.append(Parallel()(delayed(fit_models_parallel_keys)(item,prod_df, run_mofcst,train_date) for item in keys_models if item is not None)) #,model_dfs, model_accs) for i in models_list))
    model_accs_dfs.append(Parallel()(delayed(fit_models_parallel_keys)(item,prod_df, run_mofcst,train_date) for item in keys_models if item is not None)) #,model_dfs, model_accs) for i in models_list))
    #model_dfs.append(Parallel()(delayed(fit_models_parallel)(key, i,prod_df, run_mofcst,train_date) for key in keys for i in models_list)) #,model_dfs, model_accs) for i in models_list))
finish_time = time.perf_counter()
print(f"Program finished in {finish_time-start_time} seconds")

Program finished in 26.632363899999994 seconds


In [33]:
model_accs_dfs

[[[component          Order_Volume_(STD)  model               group_key
   Order_Create_Date                                                   
   2022-10-01               2.506626e+06  Orbit  Midwest East_C+ St Plk
   2022-11-01               2.289201e+06  Orbit  Midwest East_C+ St Plk
   2022-12-01               2.460775e+06  Orbit  Midwest East_C+ St Plk
   2023-01-01               2.246493e+06  Orbit  Midwest East_C+ St Plk
   2023-02-01               2.092613e+06  Orbit  Midwest East_C+ St Plk
   2023-03-01               2.932242e+06  Orbit  Midwest East_C+ St Plk,
           mape      smape      mase  model               group_key
   0  23.494822  22.596591  1.098094  Orbit  Midwest East_C+ St Plk],
  [component          Order_Volume_(STD)                 model               group_key
   Order_Create_Date                                                                  
   2022-10-01               2.678437e+06  ExponentialSmoothing  Midwest East_C+ St Plk
   2022-11-01           

In [34]:
model_dfs, accuracies = zip(*model_accs_dfs[0])

In [35]:
accuracies

(        mape      smape      mase  model               group_key
 0  23.494822  22.596591  1.098094  Orbit  Midwest East_C+ St Plk,
         mape      smape      mase                 model               group_key
 0  26.357833  23.518288  1.135007  ExponentialSmoothing  Midwest East_C+ St Plk,
         mape      smape      mase          model               group_key
 0  29.504585  28.768203  1.410524  NaiveSeasonal  Midwest East_C+ St Plk,
         mape      smape      mase       model               group_key
 0  30.548345  29.308868  1.436131  NaiveDrift  Midwest East_C+ St Plk,
         mape      smape      mase      model               group_key
 0  25.001681  28.800809  1.417184  NaiveMean  Midwest East_C+ St Plk,
         mape      smape      mase      model               group_key
 0  42.567302  34.830384  1.730556  AutoARIMA  Midwest East_C+ St Plk,
        mape      smape      mase  model               group_key
 0  9.947918  10.309247  0.709101  Orbit  Southeast_Int 1/4 Inch,

In [36]:
model_dfs

(component          Order_Volume_(STD)  model               group_key
 Order_Create_Date                                                   
 2022-10-01               2.506626e+06  Orbit  Midwest East_C+ St Plk
 2022-11-01               2.289201e+06  Orbit  Midwest East_C+ St Plk
 2022-12-01               2.460775e+06  Orbit  Midwest East_C+ St Plk
 2023-01-01               2.246493e+06  Orbit  Midwest East_C+ St Plk
 2023-02-01               2.092613e+06  Orbit  Midwest East_C+ St Plk
 2023-03-01               2.932242e+06  Orbit  Midwest East_C+ St Plk,
 component          Order_Volume_(STD)                 model               group_key
 Order_Create_Date                                                                  
 2022-10-01               2.678437e+06  ExponentialSmoothing  Midwest East_C+ St Plk
 2022-11-01               2.414745e+06  ExponentialSmoothing  Midwest East_C+ St Plk
 2022-12-01               2.239311e+06  ExponentialSmoothing  Midwest East_C+ St Plk
 2023-01-01   

In [37]:
results = pd.concat(model_dfs)
results

component,Order_Volume_(STD),model,group_key
Order_Create_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-10-01,2.506626e+06,Orbit,Midwest East_C+ St Plk
2022-11-01,2.289201e+06,Orbit,Midwest East_C+ St Plk
2022-12-01,2.460775e+06,Orbit,Midwest East_C+ St Plk
2023-01-01,2.246493e+06,Orbit,Midwest East_C+ St Plk
2023-02-01,2.092613e+06,Orbit,Midwest East_C+ St Plk
...,...,...,...
2022-11-01,1.715424e+05,AutoARIMA,Southeast_C+ St HLD
2022-12-01,8.381141e+05,AutoARIMA,Southeast_C+ St HLD
2023-01-01,-3.950926e+05,AutoARIMA,Southeast_C+ St HLD
2023-02-01,3.493049e+05,AutoARIMA,Southeast_C+ St HLD


In [38]:
results.reset_index(inplace=True)

In [39]:
results[results['Order_Create_Date']=='2022-10-01'].head(32)

component,Order_Create_Date,Order_Volume_(STD),model,group_key
0,2022-10-01,2506626.0,Orbit,Midwest East_C+ St Plk
6,2022-10-01,2678437.0,ExponentialSmoothing,Midwest East_C+ St Plk
12,2022-10-01,2326735.0,NaiveSeasonal,Midwest East_C+ St Plk
18,2022-10-01,2337796.0,NaiveDrift,Midwest East_C+ St Plk
24,2022-10-01,1940590.0,NaiveMean,Midwest East_C+ St Plk
30,2022-10-01,2754510.0,AutoARIMA,Midwest East_C+ St Plk
36,2022-10-01,942986.5,Orbit,Southeast_Int 1/4 Inch
42,2022-10-01,906620.6,ExponentialSmoothing,Southeast_Int 1/4 Inch
48,2022-10-01,859680.0,NaiveSeasonal,Southeast_Int 1/4 Inch
54,2022-10-01,861390.9,NaiveDrift,Southeast_Int 1/4 Inch


In [40]:
res_accuracies = pd.concat(accuracies)
res_accuracies.head(32)

Unnamed: 0,mape,smape,mase,model,group_key
0,23.494822,22.596591,1.098094,Orbit,Midwest East_C+ St Plk
0,26.357833,23.518288,1.135007,ExponentialSmoothing,Midwest East_C+ St Plk
0,29.504585,28.768203,1.410524,NaiveSeasonal,Midwest East_C+ St Plk
0,30.548345,29.308868,1.436131,NaiveDrift,Midwest East_C+ St Plk
0,25.001681,28.800809,1.417184,NaiveMean,Midwest East_C+ St Plk
0,42.567302,34.830384,1.730556,AutoARIMA,Midwest East_C+ St Plk
0,9.947918,10.309247,0.709101,Orbit,Southeast_Int 1/4 Inch
0,12.672956,13.379891,0.894491,ExponentialSmoothing,Southeast_Int 1/4 Inch
0,11.588366,12.177806,0.837067,NaiveSeasonal,Southeast_Int 1/4 Inch
0,11.318634,11.817141,0.813465,NaiveDrift,Southeast_Int 1/4 Inch


In [41]:
res_accuracies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 60 entries, 0 to 0
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   mape       60 non-null     float64
 1   smape      60 non-null     float64
 2   mase       60 non-null     float64
 3   model      60 non-null     object 
 4   group_key  60 non-null     object 
dtypes: float64(3), object(2)
memory usage: 2.8+ KB


In [46]:
res_accuracies.reset_index(inplace=True)

In [47]:
holdout_winners = res_accuracies.loc[res_accuracies.groupby(['group_key'])['mase'].idxmin()]
winners = dict(zip(holdout_winners['group_key'],holdout_winners['model']))

In [49]:
holdout_winners

Unnamed: 0,index,mape,smape,mase,model,group_key
17,0,4.739034,4.833573,0.254579,AutoARIMA,Mid Atlantic_Int 1/2 Inch
25,0,355.3824,62.056184,0.530623,ExponentialSmoothing,Mid Atlantic_Pr Soff 8'
34,0,54.7766,79.764395,4.799647,NaiveMean,Mid South_C+ St Pnl
0,0,23.49482,22.596591,1.098094,Orbit,Midwest East_C+ St Plk
22,0,48.96438,50.046661,2.59282,NaiveMean,Mountain_Pr Plk
42,0,22.26771,17.166734,1.265286,Orbit,North Atlantic_C+ St Pnl
48,0,99.46672,60.226888,0.659342,Orbit,Pacific Northwest_Pr CemPre
37,0,250634900000.0,157.598231,1.473333,ExponentialSmoothing,Pacific Northwest_Pr HLD
58,0,61.08281,69.929913,3.663023,NaiveMean,Southeast_C+ St HLD
6,0,9.947918,10.309247,0.709101,Orbit,Southeast_Int 1/4 Inch


In [48]:
winners

{'Mid Atlantic_Int 1/2 Inch': 'AutoARIMA',
 "Mid Atlantic_Pr Soff 8'": 'ExponentialSmoothing',
 'Mid South_C+ St Pnl': 'NaiveMean',
 'Midwest East_C+ St Plk': 'Orbit',
 'Mountain_Pr Plk': 'NaiveMean',
 'North Atlantic_C+ St Pnl': 'Orbit',
 'Pacific Northwest_Pr CemPre': 'Orbit',
 'Pacific Northwest_Pr HLD': 'ExponentialSmoothing',
 'Southeast_C+ St HLD': 'NaiveMean',
 'Southeast_Int 1/4 Inch': 'Orbit'}

In [54]:
def fcst_models_parallel_keys(item,prod_df, run_mofcst,train_date,winners):#,model_dfs, model_accs):
    key = item[0]
    models_list_item= item[1]
    t_modelfcst_start = time.time()
    name = models_list_item[1]
    model = models_list_item[0]
    data_df = prod_df[prod_df['group_key']==key]
    prod_df = prep_comp_prod[prep_comp_prod['group_key']==key]

    wmodel_name = winners.get(key)
    #wmodel = models[models_names.index(wmodel_name)]

    #### Instantiate model framework
    if (name == 'Orbit'):
        wmodel_frame = Orbit(model,prod_df,run_mofcst,train_date=train_date, forecast_horizon=forecast_horizon)
    else:
        wmodel_frame = Darts(model,prod_df, run_mofcst, train_date,forecast_horizon=forecast_horizon)

    #### Create time series ##
    #prod_ts = pre_process.create_ts(prod_df,run_mofcst)
    wmodel_frame.prep_data()

    #fcst_horizon = forecast_horizon

    #### Fit and predict future
    try:
    #wmodel_frame.fit(prod_ts)
        wmodel_frame.train_model(train_mode=False)
    except Exception as e:
        jhds_logger.exception(f'Exception {e} occurred in key {key} and model {wmodel_name}')

    try:
        fcst = wmodel_frame.pred_model(train_mode=False)
    except ValueError:
        jhds_logger.error(f'Forecasting key {key} failed due to NaN, infinity or too large number')

    fcst_df = fcst.pd_dataframe()
    fcst_df['model'] = name
    fcst_df['group_key'] = key
    # Check if the model used was a winner
    fcst_df['winner'] = np.where((name==wmodel_name),True,False)
    fcst_df.rename(columns={'0':'fcst'},inplace=True)
    #forecasts.append(fcst_df)
    #t_inner.toc(f'Product Model fit and preds for {key} took ')
    t_modelfcst_end = time.time()
    jhds_logger.info(f'Finished forecasting for {key}, it took {"{:.2f}".format((t_modelfcst_end-t_modelfcst_start)/60)} minutes')

    return fcst_df

In [50]:
winners.get('Midwest East_C+ St Plk')

'Orbit'

In [55]:
model_fcsts_dfs = []
start_time = time.perf_counter()
with parallel_backend('threading',n_jobs=4):
    model_fcsts_dfs.append(Parallel()(delayed(fcst_models_parallel_keys)(item,prod_df, run_mofcst,train_date,winners) for item in keys_models if item is not None)) 
finish_time = time.perf_counter()
print(f"Program finished in {finish_time-start_time} seconds")

Program finished in 25.100535300000047 seconds


In [56]:
model_fcsts_dfs

[[component          Order_Volume_(STD)  model               group_key  winner
  Order_Create_Date                                                           
  2023-04-01               2.815866e+06  Orbit  Midwest East_C+ St Plk    True
  2023-05-01               2.298322e+06  Orbit  Midwest East_C+ St Plk    True
  2023-06-01               2.738838e+06  Orbit  Midwest East_C+ St Plk    True
  2023-07-01               2.524353e+06  Orbit  Midwest East_C+ St Plk    True
  2023-08-01               2.828043e+06  Orbit  Midwest East_C+ St Plk    True
  2023-09-01               2.509730e+06  Orbit  Midwest East_C+ St Plk    True
  2023-10-01               3.014950e+06  Orbit  Midwest East_C+ St Plk    True
  2023-11-01               2.140365e+06  Orbit  Midwest East_C+ St Plk    True
  2023-12-01               2.264904e+06  Orbit  Midwest East_C+ St Plk    True
  2024-01-01               2.323024e+06  Orbit  Midwest East_C+ St Plk    True
  2024-02-01               1.988701e+06  Orbit  Midw

In [58]:
forecasts = pd.concat(model_fcsts_dfs[0])

In [60]:
forecasts.reset_index()

component,Order_Create_Date,Order_Volume_(STD),model,group_key,winner
0,2023-04-01,2.815866e+06,Orbit,Midwest East_C+ St Plk,True
1,2023-05-01,2.298322e+06,Orbit,Midwest East_C+ St Plk,True
2,2023-06-01,2.738838e+06,Orbit,Midwest East_C+ St Plk,True
3,2023-07-01,2.524353e+06,Orbit,Midwest East_C+ St Plk,True
4,2023-08-01,2.828043e+06,Orbit,Midwest East_C+ St Plk,True
...,...,...,...,...,...
1435,2024-11-01,1.491611e+05,AutoARIMA,Southeast_C+ St HLD,False
1436,2024-12-01,1.499498e+05,AutoARIMA,Southeast_C+ St HLD,False
1437,2025-01-01,1.505699e+05,AutoARIMA,Southeast_C+ St HLD,False
1438,2025-02-01,1.510573e+05,AutoARIMA,Southeast_C+ St HLD,False


In [61]:
forecasts[forecasts['group_key']=='Midwest East_C+ St Plk']

component,Order_Volume_(STD),model,group_key,winner
Order_Create_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-04-01,2.815866e+06,Orbit,Midwest East_C+ St Plk,True
2023-05-01,2.298322e+06,Orbit,Midwest East_C+ St Plk,True
2023-06-01,2.738838e+06,Orbit,Midwest East_C+ St Plk,True
2023-07-01,2.524353e+06,Orbit,Midwest East_C+ St Plk,True
2023-08-01,2.828043e+06,Orbit,Midwest East_C+ St Plk,True
...,...,...,...,...
2024-11-01,2.976067e+06,AutoARIMA,Midwest East_C+ St Plk,False
2024-12-01,2.990515e+06,AutoARIMA,Midwest East_C+ St Plk,False
2025-01-01,3.004963e+06,AutoARIMA,Midwest East_C+ St Plk,False
2025-02-01,3.019411e+06,AutoARIMA,Midwest East_C+ St Plk,False
