In [None]:
#default_exp hierarchical

In [None]:
#hide
%load_ext autoreload
%autoreload 2

# Hierarchical Time Series Forecasting

In [None]:
#hide
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [None]:
#hide
from fastcore.test import test_eq

In [109]:
#export
from typing import Callable, List, Optional, Tuple

import numpy as np
import pandas as pd

from statsforecast.core import StatsForecast

In [141]:
#export
def _aggregate_key(df: pd.DataFrame, keys: List[List[str]], agg_fn: Callable = np.sum):
    """Aggregates `df` according to `keys` using `agg_fn`."""
    df = df.copy()
    max_len_idx = np.argmax([len(key) for key in keys])
    bottom_comb = keys[max_len_idx]
    orig_cols = df.drop(labels=['ds', 'y'], axis=1).columns.to_list()
    df_keys = []
    for key in keys:
        if key == ['total']:
            df = df.assign(total='total')
        df_key = df.groupby(key + ['ds'])['y'].apply(agg_fn).reset_index()
        df_key['unique_id'] = df_key[key].agg('_'.join, axis=1)
        if key == bottom_comb:
            bottom_keys = df_key['unique_id'].unique()
        df_keys.append(df_key)
    df_keys = pd.concat(df_keys)
    s_df = df_keys[['unique_id'] + orig_cols].drop_duplicates().reset_index(drop=True)
    y_df = df_keys[['unique_id', 'ds', 'y']].set_index('unique_id')
    #s_mat definition
    s_mat = np.zeros((len(s_df), len(bottom_keys)))
    for idx, label in enumerate(s_df['unique_id'], start=0):
        if label == 'total':
            s_mat[idx] = 1
        else:
            s_mat[idx, [label in bt for bt in bottom_keys]] = 1
    return s_df, s_mat, y_df

In [150]:
#hide
df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
s_df, s_mat, y_df = _aggregate_key(df, [['total'],
                                         ['State'], 
                                         ['Purpose'], 
                                         ['State', 'Region'], 
                                         ['State', 'Purpose'], 
                                         ['State', 'Region', 'Purpose']])
test_eq(len(y_df), 34_000)
test_eq(y_df.index.nunique(), 425)
test_eq(s_mat.shape, (425, 304))

In [114]:
#export
def bottom_up(y_hat: np.ndarray, s_mat: np.ndarray):
    ...

In [115]:
#export
class HierarchicalStatsForecast:
    
    def __init__(self, df: pd.DataFrame, keys: List[List[str]], 
                 models: List, freq: str, 
                 n_jobs: int = 1, ray_address: Optional[str] = None):
        self.s_df, y_df = _aggregate_key(df, keys=keys)
        self.fcst = StatsForecast(df=y_df, models=models, freq=freq, 
                                  n_jobs=n_jobs, ray_address=ray_address)
    
    def forecast(self, h: int, reconcile_fns: List[Callable] = [bottom_up],
                 xreg: Optional[pd.DataFrame] = None, 
                 level: Optional[Tuple] = None):
        fcsts = self.fcst.forecast(h=h, xreg=xreg, level=level)