In [None]:
#default_exp hierarchical

In [None]:
#hide
%load_ext autoreload
%autoreload 2

# Hierarchical Time Series Forecasting

In [None]:
#hide
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [None]:
#hide
from fastcore.test import test_eq

In [None]:
#export
from functools import partial
from inspect import signature
from typing import Callable, List, Optional, Tuple

import numpy as np
import pandas as pd
from statsmodels.stats.moment_helpers import cov2corr
from sklearn.preprocessing import OneHotEncoder

from statsforecast.core import StatsForecast, _as_tuple, _build_forecast_name

In [None]:
#export
def _to_summing_matrix(df: pd.DataFrame):
    """Transforms the bottom DataFrame `df` to a summing matrix S."""
    categories = [df[col].unique() for col in df.columns]
    cat_sizes = [len(cats) for cats in categories]
    idx_max_cat_size = np.argmax(cat_sizes)
    cat_sizes = np.cumsum(cat_sizes)
    idx_bottom = np.arange(cat_sizes[idx_max_cat_size - 1], cat_sizes[idx_max_cat_size])
    encoder = OneHotEncoder(categories=categories, sparse=False, dtype=np.float32)
    S = encoder.fit_transform(df).T
    return S, idx_bottom

In [None]:
#export
def _aggregate_key(df: pd.DataFrame, keys: List[List[str]], agg_fn: Callable = np.sum):
    """Aggregates `df` according to `keys` using `agg_fn`."""
    max_len_idx = np.argmax([len(key) for key in keys])
    bottom_comb = keys[max_len_idx]
    orig_cols = df.drop(labels=['ds', 'y'], axis=1).columns.to_list()
    df_keys = []
    for key in keys:
        df_key = df.groupby(key + ['ds'])['y'].apply(agg_fn).reset_index()
        df_key['unique_id'] = df_key[key].agg('_'.join, axis=1)
        if key == bottom_comb:
            bottom_keys = df_key['unique_id'].unique()
        df_keys.append(df_key)
    df_keys = pd.concat(df_keys)
    S_df = df_keys[['unique_id'] + bottom_comb].drop_duplicates().reset_index(drop=True)
    S_df = S_df.set_index('unique_id')
    S_df = S_df.fillna('agg')
    keys_cols = []
    for key in keys:
        key_col = '_'.join(key) 
        S_df[key_col] = S_df[key].agg('_'.join, axis=1)
        keys_cols.append(key_col)
    y_df = df_keys[['unique_id', 'ds', 'y']].set_index('unique_id')
    #S definition
    S, idx_bottom = _to_summing_matrix(S_df.loc[bottom_keys, keys_cols])
    return S_df[keys_cols], S, idx_bottom, bottom_keys, y_df

In [None]:
#hide
df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
df.insert(0, 'Country', 'Australia')
hiers_grouped = [
    ['Country'],
    ['Country', 'State'], 
    ['Country', 'Purpose'], 
    ['Country', 'State', 'Region'], 
    ['Country', 'State', 'Purpose'], 
    ['Country', 'State', 'Region', 'Purpose']
]
S_df, S, idx_bottom, bottom_keys, y_df = _aggregate_key(df, hiers_grouped)
test_eq(len(y_df), 34_000)
test_eq(y_df.index.nunique(), 425)
test_eq(S.shape, (425, 304))
test_eq(idx_bottom.size, 304)

In [None]:
#hide
hiers = [
    ['Country'],
    ['Country', 'State'], 
    ['Country', 'State', 'Region']
]
S_df, S, idx_bottom, bottom_keys, y_df = _aggregate_key(df, hiers)
test_eq(len(y_df), 6_800)
test_eq(y_df.index.nunique(), 85)
test_eq(S.shape, (85, 76))
test_eq(idx_bottom.size, 76)

In [None]:
#export
def _reconcile(S: np.ndarray, P: np.ndarray, W: np.ndarray, 
               y_hat: np.ndarray, SP: np.ndarray = None):
    if SP is None:
        SP = S @ P
    return np.matmul(SP, y_hat)

In [None]:
#export
def bottom_up(hfcst: HierarchicalStatsForecast, y_hat: np.ndarray):
    n_hiers, n_bottom = hfcst.S.shape
    P = np.eye(n_bottom, n_hiers, k=(n_hiers - n_bottom), dtype=np.float32)
    W = np.eye(n_hiers, dtype=np.float32)
    return _reconcile(hfcst.S, P, W, y_hat)

In [None]:
#export
def top_down(hfcst: HierarchicalStatsForecast, 
             y_hat: np.ndarray,
             method: str):
    n_hiers, n_bottom = hfcst.S.shape
    idx_top = int(hfcst.S.sum(axis=1).argmax())
    #add strictly hierarchical assert
    
    if method == 'forecast_proportions':
        raise NotImplementedError(f'Method {method} not implemented yet')
    else:
        y_top = hfcst.fcst.ga[idx_top]
        y_btm = np.hstack(hfcst.fcst.ga[int(idx)] for idx in hfcst.idx_bottom)
        if method == 'average_proportions':
            prop = np.mean(y_btm / y_top, axis=0)
        elif method == 'proportion_averages':
            prop = np.mean(y_btm, axis=0) / np.mean(y_top)
        else:
            raise Exception(f'Unknown method {method}')
    P = np.zeros_like(hfcst.S).T
    P[:, idx_top] = prop
    W = np.eye(n_hiers, dtype=np.float32)
    return _reconcile(hfcst.S, P, W, y_hat)

In [None]:
#export
def crossprod(x):
    return x.T @ x

In [None]:
#export
def min_trace(hfcst: HierarchicalStatsForecast, y_hat: np.ndarray, method: str,
              residuals: np.ndarray = None):
    # shape residuals (obs, n_hiers)
    res_methods = ['wls_var', 'mint_cov', 'mint_shrink']
    if method in res_methods and residuals is None:
        raise ValueError(f"For methods {', '.join(res_methods)} you need to pass residuals")
    n_hiers, n_bottom = hfcst.S.shape
    if method == 'ols':
        W = np.eye(n_hiers)
    elif method == 'wls_struct':
        W = np.diag(hfcst.S @ np.ones((n_bottom,)))
    elif method in res_methods:
        n, _ = residuals.shape
        masked_res = np.ma.array(residuals, mask=np.isnan(residuals))
        covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data
        if method == 'wls_var':
            W = np.diag(np.diag(covm))
        elif method == 'mint_cov':
            W = covm
        elif method == 'mint_shrink':
            tar = np.diag(np.diag(covm))
            corm = cov2corr(covm)
            xs = np.divide(residuals, np.sqrt(np.diag(covm)))
            xs = xs[~np.isnan(xs).any(axis=1), :]
            v = (1 / (n * (n - 1))) * (crossprod(xs ** 2) - (1 / n) * (crossprod(xs) ** 2))
            np.fill_diagonal(v, 0)
            corapn = cov2corr(tar)
            d = (corm - corapn) ** 2
            lmd = v.sum() / d.sum()
            lmd = max(min(lmd, 1), 0)
            W = lmd * tar + (1 - lmd) * covm
    else:
        raise ValueError(f'Unkown reconciliation method {method}')
    
    eigenvalues, _ = np.linalg.eig(W)
    if any(eigenvalues < 1e-8):
        raise Exception('min_trace needs covariance matrix to be positive definite.')
        
    R = hfcst.S.T @ np.linalg.inv(W)
    P = np.linalg.inv(R @ hfcst.S) @ R
    
    return _reconcile(hfcst.S, P, W, y_hat)

In [None]:
#export
def empirical_risk_minimization(hfcst: HierarchicalStatsForecast,
                                y_hat: np.ndarray, 
                                method: str,
                                lambda_reg: float = 1e-3):
    n_hiers, n_bottom = hfcst.S.shape
    if method in ['exact', 'svd']:
        B = y_hat.T @ hfcst.S @ np.linalg.inv(hfcst.S.T @ hfcst.S).T
        if method == 'exact':
            P = B.T @ y_hat.T @ np.linalg.inv(y_hat @ y_hat.T + lambda_reg * np.eye(n_hiers))
        elif method == 'svd':
            ...
        else:
            raise ValueError(f'Unkown reconciliation method {method}')
    else:
        raise ValueError(f'Unkown reconciliation method {method}')
        
    W = np.eye(n_hiers, dtype=np.float32)
    
    return _reconcile(hfcst.S, P, W, y_hat)

In [None]:
#export
class HierarchicalStatsForecast:
    
    def __init__(self, df: pd.DataFrame, keys: List[List[str]], 
                 models: List, 
                 reconcile_fns: List[Callable],
                 freq: str,
                 n_jobs: int = 1, ray_address: Optional[str] = None):
        self.reconcile_fns = reconcile_fns
        self.S_df, self.S, self.idx_bottom, _,  y_df = _aggregate_key(df, keys=keys)
        self.fcst = StatsForecast(df=y_df, models=models, freq=freq, 
                                  n_jobs=n_jobs, ray_address=ray_address,
                                  sort_df=False)
    
    def forecast(self, h: int,
                 xreg: Optional[pd.DataFrame] = None, 
                 level: Optional[Tuple] = None):
        fcsts = self.fcst.forecast(h=h, xreg=xreg, level=level)
        model_names = fcsts.drop(columns=['ds'], axis=1).columns.to_list()
        for model_name in model_names:
            fcsts_model = fcsts[model_name].values.reshape(-1, h)
            for reconcile_fn_args in self.reconcile_fns:
                reconcile_fn, *args = _as_tuple(reconcile_fn_args)
                reconcile_fn_name = _build_forecast_name(reconcile_fn, *args, idx_remove=2)
                fcsts_model = reconcile_fn(self, fcsts_model, *args)
                fcsts[f'{reconcile_fn_name}_{model_name}'] = fcsts_model.flatten()
        return fcsts
    
    def cross_validation(self, h: int, test_size: int, 
                         input_size: Optional[int] = None):
        fcsts = self.fcst.cross_validation(h=h, test_size=test_size, input_size=input_size, residuals=True)
        res = self.fcst.cross_validation_residuals()
        model_names = fcsts.drop(columns=['ds', 'cutoff', 'y'], axis=1).columns.to_list()
        cutoffs = fcsts['cutoff'].unique()
        for reconcile_fn_args in self.reconcile_fns:
            reconcile_fn, *args = _as_tuple(reconcile_fn_args)
            reconcile_fn_name = _build_forecast_name(reconcile_fn, *args, idx_remove=2)
            has_res = 'residuals' in signature(reconcile_fn).parameters
            for cutoff in cutoffs:
                cutoff_idx = fcsts['cutoff'] == cutoff
                for model_name in model_names:
                    fcsts_model = fcsts.loc[cutoff_idx, model_name].values.reshape(-1, h)
                    if has_res:
                        res_cutoff_idx = res['cutoff'] == cutoff
                        res_model = res.loc[res_cutoff_idx].pivot(columns='ds', values=model_name).values.T
                        fcsts_reconciled = reconcile_fn(self, fcsts_model, *args, residuals=res_model)
                    else:
                        fcsts_reconciled = reconcile_fn(self, fcsts_model, *args)
                    fcsts.loc[cutoff_idx, f'{model_name}/{reconcile_fn_name}'] = fcsts_reconciled.flatten()
        return fcsts

In [None]:
#hide
from statsforecast.models import naive, auto_arima
# transform ds to int
ds_int = df[['ds']].drop_duplicates().assign(ds_int = lambda df: np.arange(len(df)) + 1)
df = df.merge(ds_int, how='left', on=['ds']).drop('ds', axis=1)
df = df.rename(columns={'ds_int': 'ds'})
# hierarchical
hier_fcst = HierarchicalStatsForecast(df, 
                                      keys=hiers,
                                      models=[naive, (auto_arima, 4)],
                                      reconcile_fns=[
                                          bottom_up, 
                                          #(min_trace, 'ols'), 
                                          #(min_trace, 'wls_struct'),
                                          #(min_trace, 'wls_var'),
                                          #(min_trace, 'mint_cov'),
                                          #(min_trace, 'mint_shrink'),
                                          (empirical_risk_minimization, 'exact')
                                          #(top_down, 'average_proportions'), 
                                          #(top_down, 'proportion_averages'),

                                      ],
                                      freq='D', 
                                      n_jobs=-1)
with np.errstate(invalid='ignore'):
    hier_fcsts = hier_fcst.cross_validation(12, test_size=12)
# bottom_up with naive model should be return same forecasts
pd.testing.assert_series_equal(hier_fcsts['naive'], hier_fcsts['naive/bottom_up'], check_names=False)

In [None]:
eval_ = hier_fcsts.copy()
for model in hier_fcsts.drop(columns=['ds', 'cutoff', 'y']):
    eval_[model] = (hier_fcsts['y'] - hier_fcsts[model]) ** 2

In [None]:
eval_ = eval_.drop(columns=['ds', 'cutoff', 'y']).mean().rename('StatsForecast').to_frame()

In [None]:
eval_ = eval_.rename_axis('method').reset_index()

In [None]:
eval_[['model', 'reconciliation']] = eval_['method'].str.split('/', expand=True)

In [None]:
eval_ = eval_[['model', 'reconciliation', 'StatsForecast']]

In [None]:
eval_ = eval_.sort_values(['model', 'reconciliation'])

In [None]:
#eval_['R'] = [194000, 74055, 135950, 175946, 80133, 76289, 76289, 76289, 76289, 76289]

In [None]:
eval_