In [1]:
#default_exp hierarchical?

In [2]:
#hide
%load_ext autoreload
%autoreload 2

# Hierarchical Time Series Forecasting

In [3]:
#hide
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [4]:
#hide
from fastcore.test import test_eq

In [5]:
#export
from typing import Callable, List, Optional, Tuple

import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

from statsforecast.core import StatsForecast, _as_tuple, _build_forecast_name

In [6]:
#export
def _to_summing_matrix(df: pd.DataFrame):
    """Transforms the bottom DataFrame `df` to a summing matrix S."""
    categories = [df[col].unique() for col in df.columns]
    S = OneHotEncoder(categories=categories, sparse=False, dtype=np.float32).fit_transform(df).T
    return S

In [7]:
#export
def _aggregate_key(df: pd.DataFrame, keys: List[List[str]], agg_fn: Callable = np.sum):
    """Aggregates `df` according to `keys` using `agg_fn`."""
    max_len_idx = np.argmax([len(key) for key in keys])
    bottom_comb = keys[max_len_idx]
    orig_cols = df.drop(labels=['ds', 'y'], axis=1).columns.to_list()
    df_keys = []
    for key in keys:
        df_key = df.groupby(key + ['ds'])['y'].apply(agg_fn).reset_index()
        df_key['unique_id'] = df_key[key].agg('_'.join, axis=1)
        if key == bottom_comb:
            bottom_keys = df_key['unique_id'].unique()
        df_keys.append(df_key)
    df_keys = pd.concat(df_keys)
    S_df = df_keys[['unique_id'] + bottom_comb].drop_duplicates().reset_index(drop=True)
    S_df = S_df.set_index('unique_id')
    S_df = S_df.fillna('agg')
    keys_cols = []
    for key in keys:
        key_col = '_'.join(key) 
        S_df[key_col] = S_df[key].agg('_'.join, axis=1)
        keys_cols.append(key_col)
    y_df = df_keys[['unique_id', 'ds', 'y']].set_index('unique_id')
    #S definition
    S = _to_summing_matrix(S_df.loc[bottom_keys, keys_cols])
    return S_df[keys_cols], S, bottom_keys, y_df

In [8]:
#hide
df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
df.insert(0, 'Country', 'Australia')
hierarchies = [
    ['Country'],
    ['Country', 'State'], 
    ['Country', 'Purpose'], 
    ['Country', 'State', 'Region'], 
    ['Country', 'State', 'Purpose'], 
    ['Country', 'State', 'Region', 'Purpose']
]
S_df, S, bottom_keys, y_df = _aggregate_key(df, hierarchies)
test_eq(len(y_df), 34_000)
test_eq(y_df.index.nunique(), 425)
test_eq(S.shape, (425, 304))

In [9]:
#export
def bottom_up(S: np.ndarray, y_hat: np.ndarray):
    n_hiers, n_bottom = S.shape
    P = np.eye(n_bottom, n_hiers, k=(n_hiers - n_bottom), dtype=np.float32)
    W = np.eye(n_hiers, dtype=np.float32)
    return P, W

In [14]:
#export
def min_trace(S: np.ndarray, y_hat: np.ndarray, method: str):
    n_hiers, n_bottom = S.shape
    if method == 'ols':
        # this should be modified once we have residuals
        W = np.eye(n_hiers)
    elif method == 'wls_struct':
        W = np.diag(S @ np.ones((n_bottom,)))
    else:
        raise ValueError(f'Unkown reconciliation method {method}')
    
    eigenvalues, _ = np.linalg.eig(W)
    if any(eigenvalues < 1e-8):
        raise Exception('min_trace needs covariance matrix to be positive definite.')
    R = S.T @ np.linalg.inv(W)
    P = np.linalg.inv(R @ S) @ R
    return P, W

In [15]:
#export
def _reconcile(S: np.ndarray, P: np.ndarray, W: np.ndarray, 
               y_hat: np.ndarray, SP: np.ndarray = None):
    if SP is None:
        SP = S @ P
    return np.matmul(SP, y_hat)

In [16]:
#export
class HierarchicalStatsForecast:
    
    def __init__(self, df: pd.DataFrame, keys: List[List[str]], 
                 models: List, 
                 reconcile_fns: List[Callable],
                 freq: str,
                 n_jobs: int = 1, ray_address: Optional[str] = None):
        self.reconcile_fns = reconcile_fns
        self.S_df, self.S, _,  y_df = _aggregate_key(df, keys=keys)
        self.fcst = StatsForecast(df=y_df, models=models, freq=freq, 
                                  n_jobs=n_jobs, ray_address=ray_address,
                                  sort_df=False)
    
    def forecast(self, h: int,
                 xreg: Optional[pd.DataFrame] = None, 
                 level: Optional[Tuple] = None):
        fcsts = self.fcst.forecast(h=h, xreg=xreg, level=level)
        model_names = fcsts.drop(columns=['ds'], axis=1).columns.to_list()
        for model_name in model_names:
            fcsts_model = fcsts[model_name].values.reshape(-1, h)
            for reconcile_fn_args in self.reconcile_fns:
                reconcile_fn, *args = _as_tuple(reconcile_fn_args)
                reconcile_fn_name = _build_forecast_name(reconcile_fn, *args, idx_remove=2)
                P, W = reconcile_fn(self.S, fcsts_model, *args)
                fcsts_model = _reconcile(S=self.S, P=P, W=W, y_hat=fcsts_model)
                fcsts[f'{reconcile_fn_name}_{model_name}'] = fcsts_model.flatten()
        return fcsts
    
    def cross_validation(self, h: int, test_size: int, 
                         input_size: Optional[int] = None):
        fcsts = self.fcst.cross_validation(h=h, test_size=test_size, input_size=input_size)
        model_names = fcsts.drop(columns=['ds', 'cutoff', 'y'], axis=1).columns.to_list()
        cutoffs = fcsts['cutoff'].unique()
        for model_name in model_names:
            for cutoff in cutoffs:
                cutoff_idx = fcsts['cutoff'] == cutoff
                fcsts_model = fcsts.loc[cutoff_idx, model_name].values.reshape(-1, h)
                for reconcile_fn_args in self.reconcile_fns:
                    reconcile_fn, *args = _as_tuple(reconcile_fn_args)
                    reconcile_fn_name = _build_forecast_name(reconcile_fn, *args, idx_remove=2)
                    print(reconcile_fn_name)
                    P, W = reconcile_fn(self.S, fcsts_model, *args)
                    fcsts_model = _reconcile(S=self.S, P=P, W=W, y_hat=fcsts_model)
                    fcsts.loc[cutoff_idx, f'{reconcile_fn_name}_{model_name}'] = fcsts_model.flatten()
        return fcsts

In [17]:
#hide
from statsforecast.models import naive
# transform ds to int
ds_int = df[['ds']].drop_duplicates().assign(ds_int = lambda df: np.arange(len(df)) + 1)
df = df.merge(ds_int, how='left', on=['ds']).drop('ds', axis=1)
df = df.rename(columns={'ds_int': 'ds'})
# hierarchical
hier_fcst = HierarchicalStatsForecast(df, 
                                      keys=hierarchies,
                                      models=[naive],
                                      reconcile_fns=[bottom_up, (min_trace, 'ols'), (min_trace, 'wls_struct')],
                                      freq='D', 
                                      n_jobs=-1)
hier_fcsts = hier_fcst.cross_validation(12, test_size=12)
# bottom_up with naive model should be return same forecasts
pd.testing.assert_series_equal(hier_fcsts['naive'], hier_fcsts['bottom_up_naive'], check_names=False)

bottom_up
min_trace_method-ols
min_trace_method-wls_struct


In [18]:
hier_fcsts

Unnamed: 0_level_0,ds,cutoff,y,naive,bottom_up_naive,min_trace_method-ols_naive,min_trace_method-wls_struct_naive
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Australia,69,68,25023.736328,23950.408203,23950.416016,23950.414026,23950.414026
Australia,70,68,23798.914062,23950.408203,23950.416016,23950.414026,23950.414026
Australia,71,68,23485.746094,23950.408203,23950.416016,23950.414026,23950.414026
Australia,72,68,25140.162109,23950.408203,23950.416016,23950.414026,23950.414026
Australia,73,68,26660.636719,23950.408203,23950.416016,23950.414026,23950.414026
...,...,...,...,...,...,...,...
Australia_Western Australia_Experience Perth_Visiting,76,68,439.699463,381.616241,381.616241,381.616266,381.616266
Australia_Western Australia_Experience Perth_Visiting,77,68,356.867035,381.616241,381.616241,381.616266,381.616266
Australia_Western Australia_Experience Perth_Visiting,78,68,302.296112,381.616241,381.616241,381.616266,381.616266
Australia_Western Australia_Experience Perth_Visiting,79,68,373.442078,381.616241,381.616241,381.616266,381.616266


In [21]:
for model in hier_fcsts.drop(columns=['ds', 'cutoff', 'y']):
    print(model, np.mean((hier_fcsts['y'] - hier_fcsts[model]) ** 2))

naive 27016.84
bottom_up_naive 27016.777
min_trace_method-ols_naive 27016.775500323183
min_trace_method-wls_struct_naive 27016.775500323154
