In [None]:
#| default_exp core

# Core Hierarchical 

> Hierarchical Reconciliation using time series in long format and `Pandas`.

In [None]:
#| export
from functools import partial
from inspect import signature
from typing import Callable, Dict, List, Optional

import numpy as np
import pandas as pd

In [None]:
#| hide
from fastcore.test import test_close, test_fail
from nbdev.showdoc import add_docs, show_doc

In [None]:
#| exporti
def _build_fn_name(fn) -> str:
    fn_name = type(fn).__name__
    func_params = fn.__dict__
    func_params = [f'{name}-{value}' for name, value in func_params.items()]
    if func_params:
        fn_name += '_' + '_'.join(func_params)
    return fn_name

In [None]:
#| export
class HierarchicalReconciliation:
    """Hierarchical Reconciliation Class.
    [Source code](https://github.com/dluuo/hierarchicalforecast/blob/main/hierarchicalforecast/core.py).

    **Parameters:**<br>
    `reconcilers`: Reconciliation classes of the `methods` module .<br>
    """
    def __init__(
            self, 
            reconcilers: List[Callable] # Reconciliation classes of the `methods` module 
        ):
        self.reconcilers = reconcilers
        
    def reconcile(
            self, 
            Y_h: pd.DataFrame, # Base forecasts with columns `ds` and models to reconcile indexed by `unique_id`.
            Y_df: pd.DataFrame, # Training set of base time series with columns `['ds', 'y']` indexed by `unique_id`
                                # If a class of `self.reconciles` receives
                                # `y_hat_insample`, `Y_df` must include them as columns.
            S: pd.DataFrame,    #  Summing matrix of size `(base, bottom)`.
            tags: Dict[str, np.ndarray] # Each key is a level and its value contains tags associated to that level.
        ):
        """Hierarchical Reconciliation Method.
        [Source code](https://github.com/dluuo/hierarchicalforecast/blob/main/hierarchicalforecast/core.py).

        **Parameters:**<br>
        `Y_h`: Base forecasts with columns `ds` and models to reconcile indexed by `unique_id`.<br>
        `Y_df`: Training set of base time series with columns `['ds', 'y']` indexed by `unique_id`.
        If a class of `self.reconciles` receives `y_hat_insample`, `Y_df` must include them as columns.<br>
        `S`: Summing matrix of size `(base, bottom)`.<br>
        `tags`: Each key is a level and its value contains tags associated to that level.<br>
        """
        drop_cols = ['ds', 'y'] if 'y' in Y_h.columns else ['ds']
        model_names = Y_h.drop(columns=drop_cols, axis=1).columns.to_list()
        uids = Y_h.index.unique()
        # same order of Y_h to prevent errors
        S_ = S.loc[uids]
        common_vals = dict(
            y_insample = Y_df.pivot(columns='ds', values='y').loc[uids].values.astype(np.float32),
            S = S_.values.astype(np.float32),
            idx_bottom = S_.index.get_indexer(S.columns),
            levels={key: S_.index.get_indexer(val) for key, val in tags.items()}
        )
        fcsts = Y_h.copy()
        for reconcile_fn in self.reconcilers:
            reconcile_fn_name = _build_fn_name(reconcile_fn)
            has_fitted = 'y_hat_insample' in signature(reconcile_fn).parameters
            for model_name in model_names:
                # Remember: pivot sorts uid
                y_hat_model = Y_h.pivot(columns='ds', values=model_name).loc[uids].values
                if has_fitted:
                    if model_name in Y_df:
                        y_hat_insample = Y_df.pivot(columns='ds', values=model_name).loc[uids].values
                        y_hat_insample = y_hat_insample.astype(np.float32)
                        common_vals['y_hat_insample'] = y_hat_insample 
                    else:
                        # some methods have the residuals argument
                        # but they don't need them
                        # ej MinTrace(method='ols')
                        common_vals['y_hat_insample'] = None
                kwargs = [key for key in signature(reconcile_fn).parameters if key in common_vals.keys()]
                kwargs = {key: common_vals[key] for key in kwargs}
                fcsts_model = reconcile_fn(y_hat=y_hat_model, **kwargs)
                fcsts[f'{model_name}/{reconcile_fn_name}'] = fcsts_model.flatten()
                if has_fitted:
                    del common_vals['y_hat_insample']
        return fcsts

In [None]:
#| hide
add_docs(HierarchicalReconciliation, "Apply distinct reconciliation methods to a pandas dataframe.",
         reconcile="Reconcile using distinct approaches.")

In [None]:
show_doc(HierarchicalReconciliation)

In [None]:
show_doc(HierarchicalReconciliation.reconcile)

In [None]:
#| hide
from hierarchicalforecast.methods import (
    BottomUp, TopDown, MiddleOut, MinTrace, ERM,
)
from hierarchicalforecast.utils import aggregate

In [None]:
#| hide
df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
df.insert(0, 'Country', 'Australia')

# non strictly hierarchical structure
hiers_grouped = [
    ['Country'],
    ['Country', 'State'], 
    ['Country', 'Purpose'], 
    ['Country', 'State', 'Region'], 
    ['Country', 'State', 'Purpose'], 
    ['Country', 'State', 'Region', 'Purpose']
]
# strictly hierarchical structure
hiers_strictly = [
    ['Country'],
    ['Country', 'State'], 
    ['Country', 'State', 'Region'], 
]

# getting df
hier_grouped_df, S_grouped, tags_grouped = aggregate(df, hiers_grouped)
hier_strict_df, S_strict, tags_strict = aggregate(df, hiers_strictly)

In [None]:
#| hide
hier_grouped_df['y_model'] = hier_grouped_df['y']
# we should be able to recover y using the methods
hier_grouped_df_h = hier_grouped_df.groupby('unique_id').tail(12)
ds_h = hier_grouped_df_h['ds'].unique()
hier_grouped_df = hier_grouped_df.query('~(ds in @ds_h)')
#adding noise to `y_model` to avoid perfect fited values
hier_grouped_df['y_model'] += np.random.uniform(-1, 1, len(hier_grouped_df))

#hierachical reconciliation
hrec = HierarchicalReconciliation(reconcilers=[
    #these methods should reconstruct the original y
    BottomUp(),
    MinTrace(method='ols'),
    MinTrace(method='wls_struct'),
    MinTrace(method='wls_var'),
    MinTrace(method='mint_shrink'),
    # ERM recovers but needs bigger eps
    #ERM(method='reg_bu', lambda_reg=None),
])
reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)
for model in reconciled.drop(columns=['ds', 'y']).columns:
    if 'ERM' in model:
        eps = 3
    else:
        eps = 1e-5
    test_close(reconciled['y'], reconciled[model], eps=eps)

In [None]:
#| hide
# top down should break
# with non strictly hierarchical structures
hrec = HierarchicalReconciliation([TopDown(method='average_proportions')])
test_fail(
    hrec.reconcile,
    contains='requires strictly hierarchical structures',
    args=(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)
)

In [None]:
#| hide
# methods should work with
# srtictly hierarchical structures
#| hide
hier_strict_df['y_model'] = hier_strict_df['y']
# we should be able to recover y using the methods
hier_strict_df_h = hier_strict_df.groupby('unique_id').tail(12)
ds_h = hier_strict_df_h['ds'].unique()
hier_strict_df = hier_strict_df.query('~(ds in @ds_h)')
#adding noise to `y_model` to avoid perfect fited values
hier_strict_df['y_model'] += np.random.uniform(-1, 1, len(hier_strict_df))

middle_out_level = 'Country/State'
# hierarchical reconciliation
hrec = HierarchicalReconciliation(reconcilers=[
    #these methods should reconstruct the original y
    BottomUp(),
    MinTrace(method='ols'),
    MinTrace(method='wls_struct'),
    MinTrace(method='wls_var'),
    MinTrace(method='mint_shrink'),
    # top down doesnt recover the original y
    # but it should recover the total level
    TopDown(method='forecast_proportions'),
    TopDown(method='average_proportions'),
    TopDown(method='proportion_averages'),
    # middle out doesnt recover the original y
    # but it should recover the total level
    MiddleOut(level=middle_out_level, top_down_method='forecast_proportions'),
    MiddleOut(level=middle_out_level, top_down_method='average_proportions'),
    MiddleOut(level=middle_out_level, top_down_method='proportion_averages'),
    # ERM recovers but needs bigger eps
    #ERM(method='reg_bu', lambda_reg=None),
])
reconciled = hrec.reconcile(hier_strict_df_h, hier_strict_df, S_strict, tags_strict)
for model in reconciled.drop(columns=['ds', 'y']).columns:
    if 'ERM' in model:
        eps = 3
    else:
        eps = 1e-5
    if 'TopDown' in model:
        if 'forecast_proportions' in model:
            test_close(reconciled['y'], reconciled[model], eps)
        else:
            # top down doesnt recover the original y
            test_fail(
                test_close,
                args=(reconciled['y'], reconciled[model], eps),
            )
        # but it should recover the total level
        total_tag = tags_strict['Country']
        test_close(reconciled['y'].loc[total_tag], 
                   reconciled[model].loc[total_tag], 1e-2)
    elif 'MiddleOut' in model:
        if 'forecast_proportions' in model:
            test_close(reconciled['y'], reconciled[model], eps)
        else:
            # top down doesnt recover the original y
            test_fail(
                test_close,
                args=(reconciled['y'], reconciled[model], eps),
            )
        # but it should recover the total level
        total_tag = tags_strict[middle_out_level]
        test_close(reconciled['y'].loc[total_tag], 
                   reconciled[model].loc[total_tag], 1e-2)
    else:
        test_close(reconciled['y'], reconciled[model], eps)

In [None]:
#| hide
#test methods that dont use residuals
#even if their signature includes
#that argument
hrec = HierarchicalReconciliation([MinTrace(method='ols')])
reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df.drop(columns=['y_model']), S_grouped, tags_grouped)
for model in reconciled.drop(columns=['ds', 'y']).columns:
    test_close(reconciled['y'], reconciled[model])

In [None]:
#| hide
reconciled.loc[tags_grouped['Country/State']]