#### **Overview**

In [16]:
import numpy as np
import pandas as pd

from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo
from statsforecast.core import StatsForecast
from statsforecast.models import AutoARIMA, Naive
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut
from hierarchicalforecast.evaluation import HierarchicalEvaluation

In [3]:
group_name = 'TourismSmall'
group = HierarchicalInfo.get_group(group_name)
Y_df, S_df, tags = HierarchicalData.load('./data', group_name)
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

100%|██████████| 1.30M/1.30M [00:00<00:00, 1.50MiB/s]
INFO:datasetsforecast.utils:Successfully downloaded datasets.zip, 1297274, bytes.
INFO:datasetsforecast.utils:Decompressing zip file...
INFO:datasetsforecast.utils:Successfully decompressed data\hierarchical\datasets.zip


In [6]:
Y_df.unique_id.unique()

array(['total', 'hol', 'vfr', 'bus', 'oth', 'nsw-hol', 'vic-hol',
       'qld-hol', 'sa-hol', 'wa-hol', 'tas-hol', 'nt-hol', 'nsw-vfr',
       'vic-vfr', 'qld-vfr', 'sa-vfr', 'wa-vfr', 'tas-vfr', 'nt-vfr',
       'nsw-bus', 'vic-bus', 'qld-bus', 'sa-bus', 'wa-bus', 'tas-bus',
       'nt-bus', 'nsw-oth', 'vic-oth', 'qld-oth', 'sa-oth', 'wa-oth',
       'tas-oth', 'nt-oth', 'nsw-hol-city', 'nsw-hol-noncity',
       'vic-hol-city', 'vic-hol-noncity', 'qld-hol-city',
       'qld-hol-noncity', 'sa-hol-city', 'sa-hol-noncity', 'wa-hol-city',
       'wa-hol-noncity', 'tas-hol-city', 'tas-hol-noncity', 'nt-hol-city',
       'nt-hol-noncity', 'nsw-vfr-city', 'nsw-vfr-noncity',
       'vic-vfr-city', 'vic-vfr-noncity', 'qld-vfr-city',
       'qld-vfr-noncity', 'sa-vfr-city', 'sa-vfr-noncity', 'wa-vfr-city',
       'wa-vfr-noncity', 'tas-vfr-city', 'tas-vfr-noncity', 'nt-vfr-city',
       'nt-vfr-noncity', 'nsw-bus-city', 'nsw-bus-noncity',
       'vic-bus-city', 'vic-bus-noncity', 'qld-bus-city'

In [7]:
S_df

Unnamed: 0,nsw-hol-city,nsw-hol-noncity,vic-hol-city,vic-hol-noncity,qld-hol-city,qld-hol-noncity,sa-hol-city,sa-hol-noncity,wa-hol-city,wa-hol-noncity,...,qld-oth-city,qld-oth-noncity,sa-oth-city,sa-oth-noncity,wa-oth-city,wa-oth-noncity,tas-oth-city,tas-oth-noncity,nt-oth-city,nt-oth-noncity
total,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
hol,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
vfr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
bus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
oth,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wa-oth-noncity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
tas-oth-city,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
tas-oth-noncity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
nt-oth-city,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [9]:
Y_test_df = Y_df.groupby('unique_id').tail(group.horizon)
Y_train_df = Y_df.drop(Y_test_df.index)

In [12]:
# baseline
fcst = StatsForecast(
    df=Y_train_df, 
    models=[AutoARIMA(season_length=group.seasonality), Naive()], 
    freq=group.freq, 
    n_jobs=-1
)
Y_hat_df = fcst.forecast(h=group.horizon)

  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)


In [20]:
Y_hat_df.index.unique()

Index(['bus', 'hol', 'nsw-bus', 'nsw-bus-city', 'nsw-bus-noncity', 'nsw-hol',
       'nsw-hol-city', 'nsw-hol-noncity', 'nsw-oth', 'nsw-oth-city',
       'nsw-oth-noncity', 'nsw-vfr', 'nsw-vfr-city', 'nsw-vfr-noncity',
       'nt-bus', 'nt-bus-city', 'nt-bus-noncity', 'nt-hol', 'nt-hol-city',
       'nt-hol-noncity', 'nt-oth', 'nt-oth-city', 'nt-oth-noncity', 'nt-vfr',
       'nt-vfr-city', 'nt-vfr-noncity', 'oth', 'qld-bus', 'qld-bus-city',
       'qld-bus-noncity', 'qld-hol', 'qld-hol-city', 'qld-hol-noncity',
       'qld-oth', 'qld-oth-city', 'qld-oth-noncity', 'qld-vfr', 'qld-vfr-city',
       'qld-vfr-noncity', 'sa-bus', 'sa-bus-city', 'sa-bus-noncity', 'sa-hol',
       'sa-hol-city', 'sa-hol-noncity', 'sa-oth', 'sa-oth-city',
       'sa-oth-noncity', 'sa-vfr', 'sa-vfr-city', 'sa-vfr-noncity', 'tas-bus',
       'tas-bus-city', 'tas-bus-noncity', 'tas-hol', 'tas-hol-city',
       'tas-hol-noncity', 'tas-oth', 'tas-oth-city', 'tas-oth-noncity',
       'tas-vfr', 'tas-vfr-city', 'tas

In [15]:
reconcilers = [
    BottomUp(),
    TopDown(method='forecast_proportions'),
    MiddleOut(middle_level='Country/Purpose/State', 
              top_down_method='forecast_proportions')
]
hrec = HierarchicalReconciliation(reconcilers=reconcilers)
Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, S=S_df, tags=tags)

In [17]:
def mse(y, y_hat):
    return np.mean((y-y_hat)**2)

evaluator = HierarchicalEvaluation(evaluators=[mse])
evaluation = evaluator.evaluate(
        Y_hat_df=Y_rec_df, Y_test_df=Y_test_df.set_index('unique_id'),
        tags=tags, benchmark='Naive'
)
evaluation.filter(like='ARIMA', axis=1).T

level,Overall,Country,Country/Purpose,Country/Purpose/State,Country/Purpose/State/CityNonCity
metric,mse-scaled,mse-scaled,mse-scaled,mse-scaled,mse-scaled
AutoARIMA,0.311714,0.317897,0.323207,0.266118,0.305245
AutoARIMA/BottomUp,0.234665,0.22638,0.19925,0.305681,0.305245
AutoARIMA/TopDown_method-forecast_proportions,0.289413,0.317897,0.251368,0.308241,0.305982
AutoARIMA/MiddleOut_middle_level-Country/Purpose/State_top_down_method-forecast_proportions,0.252679,0.306968,0.191454,0.266118,0.270909
