**Script for collecting python-based run results**

In [None]:
import pandas as pd
pd.set_option('display.precision', 2)

import numpy as np
import datetime as dt
import copy
import glob
import os

**Valid setting-id:**
* state-space models: ss00_1001, ss01_2002, ss02_3003
* regression models: regr01_4004, regr02_5005

In [None]:
setting_id = 'ss00_1001'

In [None]:
sample_sizes = [500, 1000, 3000]
models = ['MTMFSeq2Seq','MTMFSeq2One','transformer','MLP','GBM','DeepAR','NHiTS','ARIMA','SES','Naive']

**collect raw error metrics that are saved during model training and forecasting**

In [None]:
x_err_list, y_err_list, tags = [], [], ['F','N1','N2','N3']

for model in models:
    for sample_size in sample_sizes:        
        ## gather the raw error
        filename = f'{setting_id}/{model}_{sample_size}/forecast_err.xlsx'
        
        xerr = pd.read_excel(filename,sheet_name = 'summary_x_err',index_col=0)
        xerr.index.name = 'step'
        xerr.reset_index(inplace=True)
        xerr['sample_size'] = sample_size
        xerr['model'] = model
        
        yerr = pd.read_excel(filename,sheet_name = 'summary_y_err',index_col=0)
        yerr.index.name = 'step'
        yerr.reset_index(inplace=True)
        yerr['sample_size'] = sample_size
        yerr['model'] = model
        
        x_err_list.append(xerr)
        y_err_list.append(yerr)

x_err = pd.concat(x_err_list)
y_err = pd.concat(y_err_list)

In [None]:
x_err = pd.melt(x_err, id_vars=['step','metric','model','sample_size'], value_vars=['F','N1','N2','N3'])
x_err.columns=['step','metric','model','sample_size','vintage','value']
x_err['step'] = x_err['step'].map({step_i: int(step_i.split('_')[1]) for step_i in x_err['step'].unique()})

In [None]:
y_err = pd.melt(y_err, id_vars=['step','metric','model','sample_size'], value_vars=['F','N1','N2','N3'])
y_err.columns=['step','metric','model','sample_size','vintage','value']
y_err['step'] = y_err['step'].map({step_i: int(step_i.split('_')[1]) for step_i in y_err['step'].unique()})

In [None]:
hf_model_set, lf_model_set = list(x_err['model'].unique()), list(y_err['model'].unique())

**normalize the errors by that of simple exponential smoother**

In [None]:
x_err = pd.pivot_table(x_err, values='value', index=['metric','vintage', 'sample_size','step'],columns=['model'])
x_err = x_err[hf_model_set].reset_index(['metric','vintage','sample_size','step'])

x_err_ratio = copy.deepcopy(x_err)
for col in hf_model_set:
    x_err_ratio[col] = x_err[col]/x_err['SES']

In [None]:
y_err = pd.pivot_table(y_err, values='value', index=['metric','vintage', 'sample_size','step'],columns=['model'])
y_err = y_err[lf_model_set].reset_index(['metric','vintage','sample_size','step'])

y_err_ratio = copy.deepcopy(y_err)
for col in lf_model_set:
    y_err_ratio[col] = y_err[col]/y_err['SES']

**subset to only show those reported in the paper and rename columns**

In [None]:
## get the name that was used in the paper
name_map = {'MTMFSeq2Seq':'seq2seq',
            'MTMFSeq2One':'seq2one',
            'transformer':'transf',
            'MLP':'mlp',
            'GBM':'gbm',
            'DeepAR':'deepvar',
            'NHiTS':'nhits',
            'ARIMA':'arima',
            'SES':'ses',
            'Naive':'naive'}

df_x = x_err_ratio.loc[(x_err_ratio['metric']=='median') & (x_err_ratio['step']==4)].copy().drop(columns=['metric','step']).reset_index(drop=True)
df_x.rename(columns=name_map,inplace=True)

df_y = y_err_ratio.loc[(y_err_ratio['metric']=='median') & (y_err_ratio['step']==1)].copy().drop(columns=['metric','step']).reset_index(drop=True)
df_y.rename(columns=name_map,inplace=True)

In [None]:
## ensure they are displayed in a particular order
ordered_models = ['seq2seq','transf','seq2one','gbm','mlp','deepvar','nhits','naive','ses']

df_xT = df_x.sort_values(by=['sample_size']).set_index(['sample_size','vintage'])[ordered_models].T
df_xT.index.name = None
df_yT = df_y.sort_values(by=['sample_size']).set_index(['sample_size','vintage'])[ordered_models].T
df_yT.index.name = None

**export results to excel**

In [None]:
with pd.ExcelWriter(f'{setting_id.split("_")[0]}_report.xlsx') as writer:
    df_xT.to_excel(writer,sheet_name='x1_err_ratio',index=True)
    df_yT.to_excel(writer,sheet_name='y4_err_ratio',index=True)
    
print(f'{setting_id.split("_")[0]}_report.xlsx on {dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')