## Setup

In [40]:
import pandas as pd
from darts.models import (Prophet, LinearRegressionModel, ARIMA,  ExponentialSmoothing, XGBModel,  NBEATSModel, GlobalNaiveAggregate, NaiveDrift)
from darts.dataprocessing.transformers import MinTReconciliator, BottomUpReconciliator, TopDownReconciliator
from utils import (get_winners,get_best_per_series, load_data, apply_hierarchy, compare_models_multivariate, compare_models_reconciliated, compare_models_univariate)
import matplotlib.pyplot as plt
import warnings
import logging

In [41]:
logging.getLogger('prophet').setLevel(logging.WARNING)
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)

warnings.filterwarnings("ignore")
logging.disable(logging.CRITICAL)

# changing hvar here changes which hierarchy we're using. A subset of the data or the full data? Options are 
# 'var0': the whole hierarchy
# 'v1': path 1
# 'var2': path 2
hvar = 'var0'

## Model Compariosn

In [42]:
df = load_data(file_path='data/SampleHierForecastingBASF_share.xlsx')
# changing hvar here changes which hierarchy we're using. A subset of the data or the full data? Options are ['v0','v1','v2'].
series, target, covariates, hierarchy = apply_hierarchy(df, hvar=hvar)
train, val = target[:-24], target[-24:-12]
past_cov = covariates[:-12]

In [43]:
#setup for reconciliation
hierarchical_train = series[:-24]
hierarchical_val = series[-24:-12]

reconciliator0 = MinTReconciliator(method="ols")
reconciliator0.fit(series[:-24])
reconciliator1 = TopDownReconciliator()
reconciliator1.fit(series[:-24])
reconciliator2 = BottomUpReconciliator()

reconciliators = [reconciliator0,reconciliator1,reconciliator2,]
names = ['MiNT','Top Down', 'Bottom Up']

In [44]:
uni_models_to_test = [
    ARIMA(q=1),
    ExponentialSmoothing(),
    Prophet(),
    NBEATSModel(input_chunk_length=36,output_chunk_length=24, dropout= 0.11891699976631348, n_epochs=27, batch_size=128),
    LinearRegressionModel(lags=12),
    XGBModel(lags=12)
    ]
multi_models_to_test  = [
    #NBEATS params derived from optuna. source code found in hpo.py.
    NBEATSModel(input_chunk_length=36,output_chunk_length=24, dropout= 0.11891699976631348, n_epochs=27, batch_size=128),
    LinearRegressionModel(lags=12),
    XGBModel(lags=12)
    ]
simple_models_to_test = [
    ARIMA(q=1),
    ExponentialSmoothing(),
    Prophet(),
    #NBEATSModel(input_chunk_length=36,output_chunk_length=24, dropout= 0.11891699976631348, n_epochs=27, batch_size=128),
    #LinearRegressionModel(lags=12, lags_past_covariates=12),
    LinearRegressionModel(lags=12),
    #XGBModel(lags=12, lags_past_covariates=12),
    #XGBModel(lags=12),
    NaiveDrift(),
    GlobalNaiveAggregate(input_chunk_length=3, output_chunk_length=3),
    GlobalNaiveAggregate(input_chunk_length=1, output_chunk_length=1),
    GlobalNaiveAggregate(input_chunk_length=12, output_chunk_length=12),
    ]

In [45]:
# for multi-model approach, find the best model from simple_models for each series.
best_per_series = get_best_per_series(hierarchical_train, hierarchical_val,models=simple_models_to_test)
best_per_series=best_per_series.with_hierarchy(hierarchy)

Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 77.88it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 216.92it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 229.23it/s]
EBITDA: ExponentialSmoothing
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 188.14it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 220.81it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 259.42it/s]
-DepreciationAmortization: ARIMA
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 270.97it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 250.74it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 284.05it/s]
ContributionMargin1: Prophet
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 290.65it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 254.22it/s]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 253.36it/s]
-FixCosts: Prophet
Predicting DataLoader 0: 10

In [46]:
# compare univariate baseline models, uncreconciled multivariate models, reconciled multivariate models
# includes multi-model in the multivariate models
fittedbaselinemodels, univariate_predictions = compare_models_univariate(train, val, uni_models_to_test, past_cov)
unreconciled_models, unreconciled_predictions = compare_models_multivariate(hierarchical_train, hierarchical_val, multi_models_to_test)
unreconciled_predictions['Multi Model']=best_per_series
reconciliatedpredictions = compare_models_reconciliated(data=hierarchical_train, val=val['EBIT'], models=unreconciled_predictions, reconciliators=reconciliators, reconciliator_names=names)

Epoch 26: 100%|██████████| 1/1 [00:00<00:00,  5.08it/s, train_loss=9e+7]   
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 56.09it/s]
MAE for ARIMA, Uni: 10593.30
MAPE for ARIMA, Uni: 220.89
RMSE for ARIMA, Uni: 12526.19
R^2 for ARIMA, Uni: 0.80
SMAPE for ARIMA, Uni: 81.98

MAE for ExponentialSmoothing, Uni: 9987.80
MAPE for ExponentialSmoothing, Uni: 188.91
RMSE for ExponentialSmoothing, Uni: 11312.62
R^2 for ExponentialSmoothing, Uni: 0.83
SMAPE for ExponentialSmoothing, Uni: 84.67

MAE for Prophet, Uni: 12980.65
MAPE for Prophet, Uni: 103.01
RMSE for Prophet, Uni: 16242.64
R^2 for Prophet, Uni: 0.66
SMAPE for Prophet, Uni: 117.49

MAE for NBEATSModel, Uni: 8014.69
MAPE for NBEATSModel, Uni: 108.79
RMSE for NBEATSModel, Uni: 9773.38
R^2 for NBEATSModel, Uni: 0.88
SMAPE for NBEATSModel, Uni: 90.73

MAE for LinearRegression, Uni: 10010.87
MAPE for LinearRegression, Uni: 225.42
RMSE for LinearRegression, Uni: 12464.43
R^2 for LinearRegression, Uni: 0.80
SMAPE for LinearRegr

## Export Performance

In [47]:
merged={**unreconciled_predictions,
**reconciliatedpredictions,}

merged = {k:v['EBIT'] for k,v in merged.items()}

merged = {**univariate_predictions,**merged}

In [48]:
dfs = []
for model_name, ts in merged.items():
    df = ts.to_dataframe().reset_index()  
    df.columns = ["Date", "Predictions"]   
    df["Name"] = model_name
    dfs.append(df)

merged_df = pd.concat(dfs, ignore_index=True)

merged_df = merged_df[["Date", "Name", "Predictions"]]
merged_df['Date']=pd.to_datetime(merged_df['Date'])

In [49]:
quarterly, summary = get_winners(merged_df,val['EBIT'])

In [51]:
summary.to_csv('output/results_'+hvar)
merged_df.to_csv('output/predictions_'+hvar)