# HierE2E Favorita Baseline

This notebook runs and evaluates HierE2E's baseline method predictions for the Favorita dataset.

- It reads a preprocessed Favorita dataset.
- It fits a HierE2E's model.
- It evaluates HierE2E forecasts' sCRPS and MSSE.

## References
- [GluonTS, DeepVARHierarchicalEstimator](https://ts.gluon.ai/stable/api/gluonts/gluonts.mx.model.deepvar_hierarchical.html?highlight=deepvarhierarchicalestimator#gluonts.mx.model.deepvar_hierarchical.DeepVARHierarchicalEstimator)
- [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021). End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series. Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)


<br>
You can run these experiments using GPU with Google Colab.

<a href="https://colab.research.google.com/github/Nixtla/hierarchicalforecast/blob/main/experiments/hierarchical_baselines/nbs/run_favorita_hiere2e.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install mxnet-cu112

In [2]:
import mxnet as mx

assert mx.context.num_gpus()>0

In [3]:
%%capture
!pip install gluonts
!pip install "gluonts[mxnet,pro]"
!pip install git+https://github.com/Nixtla/hierarchicalforecast.git
!pip install git+https://github.com/Nixtla/datasetsforecast.git@feat/favorita_dataset

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from gluonts.mx.trainer import Trainer
from gluonts.dataset.hierarchical import HierarchicalTimeSeries
from gluonts.dataset.common import Dataset, ListDataset
from gluonts.mx.model.deepvar_hierarchical import DeepVARHierarchicalEstimator

from hierarchicalforecast.evaluation import scaled_crps, rel_mse, msse
from datasetsforecast.favorita import FavoritaData, FavoritaInfo

import warnings
# Avoid pandas fragmentation warning and positive definite warning
warnings.filterwarnings("ignore")

In [5]:
class FavoritaHierarchicalDataset(object):
    # Class with loading, processing and
    # prediction evaluation methods for hierarchical data

    available_datasets = ['Favorita200', 'Favorita500', 'FavoritaComplete']

    @staticmethod
    def _get_hierarchical_scrps(hier_idxs, Y, Yq_hat, q_to_pred):
        # We use the indexes obtained from the aggregation tags
        # to compute scaled CRPS across the hierarchy levels
        # # [n_items, n_stores, n_time, n_quants]
        scrps_list = []
        for idxs in hier_idxs:
            y      = Y[:, idxs, :]
            yq_hat = Yq_hat[:, idxs, :, :]
            scrps  = scaled_crps(y, yq_hat, q_to_pred)
            scrps_list.append(scrps)
        return scrps_list

    @staticmethod
    def _get_hierarchical_msse(hier_idxs, Y, Y_hat, Y_train):
        # We use the indexes obtained from the aggregation tags
        # to compute scaled CRPS across the hierarchy levels
        msse_list = []
        for idxs in hier_idxs:
            y       = Y[:, idxs, :]
            y_hat   = Y_hat[:, idxs, :]
            y_train = Y_train[:, idxs, :]
            crps    = msse(y, y_hat, y_train)
            msse_list.append(crps)
        return msse_list

    @staticmethod
    def _sort_hier_df(Y_df, S_df):
        # NeuralForecast core, sorts unique_id lexicographically
        # deviating from S_df, this class matches S_df and Y_hat_df order.
        Y_df.unique_id = Y_df.unique_id.astype('category')
        Y_df.unique_id = Y_df.unique_id.cat.set_categories(S_df.index)
        Y_df = Y_df.sort_values(by=['unique_id', 'ds'])
        return Y_df

    @staticmethod
    def _nonzero_indexes_by_row(M):
        return [np.nonzero(M[row,:])[0] for row in range(len(M))]

    @staticmethod
    def load_item_data(item_id, dataset='Favorita200', directory='./data'):
        # Load data
        data_info = FavoritaInfo[dataset]
        Y_df, S_df, tags = FavoritaData.load(directory=directory,
                                             group=dataset)

        # Parse and augment data
        # + hack geographic hier_id to treat it as unique_id
        Y_df['ds'] = pd.to_datetime(Y_df['ds'])
        Y_df = Y_df[Y_df.item_id==item_id]
        Y_df = Y_df.rename(columns={'hier_id': 'unique_id'})
        Y_df = FavoritaHierarchicalDataset._sort_hier_df(Y_df=Y_df, S_df=S_df)

        # Obtain indexes for plots and evaluation
        hier_levels = ['Overall'] + list(tags.keys())
        hier_idxs = [np.arange(len(S_df))] +\
            [S_df.index.get_indexer(tags[level]) for level in list(tags.keys())]
        hier_linked_idxs = FavoritaHierarchicalDataset._nonzero_indexes_by_row(S_df.values.T)

        # MinT along other methods require a positive definite covariance matrix
        # for the residuals, when dealing with 0s as residuals the methods break
        # data is augmented with minimal normal noise to avoid this error.
        Y_df['y'] = Y_df['y'] + np.random.normal(loc=0.0, scale=0.01, size=len(Y_df))

        # Final output
        data = dict(Y_df=Y_df, S_df=S_df, tags=tags,
                    # Hierarchical idxs
                    hier_idxs=hier_idxs,
                    hier_levels=hier_levels,
                    hier_linked_idxs=hier_linked_idxs,
                    # Dataset Properties
                    horizon=data_info.horizon,
                    freq=data_info.freq,
                    seasonality=data_info.seasonality)
        return data

    @staticmethod
    def load_all_data(dataset='Favorita200', directory='./data'):
        print('Hello')

In [6]:
data = FavoritaHierarchicalDataset.load_item_data(item_id=1916577,
                          directory = './data/favorita', dataset='Favorita200')

100%|██████████| 930M/930M [00:31<00:00, 29.5MiB/s]


saved train.csv to train.feather for fast access


In [7]:
assert 1<0

AssertionError: ignored

In [8]:
data = FavoritaHierarchicalDataset.load_all_data(directory='./data/favorita',
                                                 dataset='Favorita200')
dataset = 'Favorita200'
data_info = FavoritaInfo[dataset]
Y_df, S_df, tags = FavoritaData.load(directory='./data/favorita', group=dataset)
Y_df['unique_id'] = Y_df['hier_id'] + '_' + Y_df['item_id'].astype(str)

Hello


In [9]:
is_validation = False
freq = data_info.freq
horizon = data_info.horizon
dates = Y_df.ds.unique() # already sorted

n_items = len(Y_df.item_id.unique())
n_hier = len(Y_df.hier_id.unique())
n_dates = len(Y_df.ds.unique())

Y = np.reshape(Y_df.y.values, (n_items, n_hier, n_dates))
S = S_df.values

# Get the right split for validation or test: `evaluation_length` time steps will be evaluated.
if is_validation:
    target_train = Y[:, :, :len(dates) - 2 * horizon]
    target_test = Y[:, :, :len(dates) - horizon]
    valid_plus_test_length = 2 * horizon
    valid_length = horizon
else:
    target_train = Y[:, :, :len(dates) - horizon]
    target_test = Y
    valid_plus_test_length = horizon
    valid_length = 0

train_dataset = ListDataset(
    [{"start":dates[0], "item_id": "all_items", \
      "target": target_train[idx,:,:]} for idx in range(n_items)],
    freq=freq,
    one_dim_target=False
)
test_dataset = ListDataset(
    [{"start": dates[0], "item_id": "all_items", \
      "target": target_test[idx,:,:]} for idx in range(n_items)],
    freq=freq,
    one_dim_target=False
)

In [10]:
# Optimal parameters reported from ICML 2021 code
config = {"epochs": 1, "num_batches_per_epoch": 50, "scaling": True,
       "pick_incomplete": False, "batch_size": 4, "num_parallel_samples": 200,
       "hybridize": False, "learning_rate": 0.001, "context_length": 36,
       "rank": 0, "assert_reconciliation": False, "num_deep_models": 1,
       "num_layers": 2, "num_cells": 40, "coherent_train_samples": True,
       "coherent_pred_samples": True, "likelihood_weight": 1.0,
       "CRPS_weight": 0.0, "num_samples_for_loss": 50, "sample_LH": True,
       "seq_axis": [1], "warmstart_epoch_frac": 0.0}

In [12]:
estimator = DeepVARHierarchicalEstimator(
    freq=freq, # Set TourismSmall freq='M', 'Q' Freq fails
    prediction_length=horizon,
    target_dim=n_hier,
    S=S,
    trainer=Trainer(ctx = mx.context.gpu(),
                    epochs=config['epochs'],
                    num_batches_per_epoch=config['num_batches_per_epoch'],
                    hybridize=config['hybridize'],
                    learning_rate=config['learning_rate']),
    scaling=config['scaling'],
    pick_incomplete=config['pick_incomplete'],
    batch_size=config['batch_size'],
    num_parallel_samples=config['num_parallel_samples'],
    context_length=config['context_length'],
    num_layers=config['num_layers'],
    num_cells=config['num_cells'],
    coherent_train_samples=config['coherent_train_samples'],
    coherent_pred_samples=config['coherent_pred_samples'],
    likelihood_weight=config['likelihood_weight'],
    CRPS_weight=config['CRPS_weight'],
    num_samples_for_loss=config['num_samples_for_loss'],
    sample_LH=config['sample_LH'],
    seq_axis=config['seq_axis'],
    warmstart_epoch_frac = config['warmstart_epoch_frac'],
)

predictor = estimator.train(train_dataset)
forecast_it = predictor.predict(train_dataset)

100%|██████████| 50/50 [00:16<00:00,  2.98it/s, epoch=1/1, avg_epoch_loss=301]


In [None]:
assert 1<0

In [None]:
# Load data
data_info = FavoritaInfo[dataset]
Y_df, S_df, tags = FavoritaData.load(directory=directory,
                                      group=dataset)

# Parse and augment data
# + hack geographic hier_id to treat it as unique_id
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
Y_df = Y_df[Y_df.item_id==item_id]
Y_df = Y_df.rename(columns={'hier_id': 'unique_id'})
Y_df = FavoritaHierarchicalDataset._sort_hier_df(Y_df=Y_df, S_df=S_df)

In [None]:
def run_baselines(intervals_method, item_id=1916577, dataset='Favorita200', verbose=False, seed=0):
    with CodeTimer('Read and Parse data   ', verbose):
        data = FavoritaHierarchicalDataset.load_item_data(item_id=item_id,
                                                     dataset=dataset, directory = './data/favorita')
        Y_df = data['Y_df'][["unique_id", 'ds', 'y']]
        S_df, tags = data['S_df'], data['tags']
        horizon = data['horizon']
        seasonality = data['seasonality']
        freq = data['freq']

        # Train/Test Splits
        Y_test_df  = Y_df.groupby('unique_id').tail(horizon)
        Y_train_df = Y_df.drop(Y_test_df.index)
        Y_test_df  = Y_test_df.set_index('unique_id')
        Y_train_df = Y_train_df.set_index('unique_id')

        dataset_str = f'{dataset} item_id={item_id}, h={horizon} '
        dataset_str += f'n_series={len(S_df)}, n_bottom={len(S_df.columns)} \n'
        dataset_str += f'test ds=[{min(Y_test_df.ds), max(Y_test_df.ds)}] '
        print(dataset_str)

    with CodeTimer('Fit/Predict Model	  ', verbose):
        # Read to avoid unnecesary AutoARIMA computation
        item_path = f'./data/{dataset}/{item_id}'
        os.makedirs(item_path, exist_ok=True)
        yhat_file = f'{item_path}/Y_hat.csv'
        yfitted_file = f'{item_path}/Y_fitted.csv'
        yrec_file = f'{item_path}/{intervals_method}_rec.csv'

        if os.path.exists(yhat_file):
            Y_hat_df = pd.read_csv(yhat_file)
            Y_fitted_df = pd.read_csv(yfitted_file)

        else:
            if not os.path.exists(f'./data/{dataset}'):
                os.makedirs(f'./data/{dataset}')
            fcst = StatsForecast(
                df=Y_train_df,
                models=[AutoARIMA(season_length=seasonality)],
                fallback_model=[Naive()],
                freq=freq,
                n_jobs=-1
            )
            Y_hat_df = fcst.forecast(h=horizon, fitted=True, level=LEVEL)
            Y_fitted_df = fcst.forecast_fitted_values()

            Y_hat_df = Y_hat_df.reset_index()
            Y_fitted_df = Y_fitted_df.reset_index()
            Y_hat_df.to_csv(yhat_file, index=False)
            Y_fitted_df.to_csv(yfitted_file, index=False)

        Y_hat_df = Y_hat_df.set_index('unique_id')
        Y_fitted_df = Y_fitted_df.set_index('unique_id')

    with CodeTimer('Reconcile Predictions ', verbose):
        if is_strictly_hierarchical(S=S_df.values.astype(np.float32),
            tags={key: S_df.index.get_indexer(val) for key, val in tags.items()}):
            reconcilers = [
                BottomUp(),
                TopDown(method='average_proportions'),
                TopDown(method='proportion_averages'),
                MinTrace(method='ols'),
                MinTrace(method='mint_shrink', mint_shr_ridge=1e-6),
                #ERM(method='reg_bu', lambda_reg=100) # Extremely inneficient
                ERM(method='closed')
            ]
        else:
            reconcilers = [
                BottomUp(),
                MinTrace(method='ols'),
                MinTrace(method='mint_shrink', mint_shr_ridge=1e-6),
                #ERM(method='reg_bu', lambda_reg=100) # Extremely inneficient
                ERM(method='closed')
            ]

        hrec = HierarchicalReconciliation(reconcilers=reconcilers)
        Y_rec_df = hrec.bootstrap_reconcile(Y_hat_df=Y_hat_df,
                                            Y_df=Y_fitted_df,
                                            S_df=S_df, tags=tags,
                                            level=LEVEL,
                                            intervals_method=intervals_method,
                                            num_samples=10, num_seeds=10)

        # Matching Y_test/Y_rec/S index ordering
        Y_test_df = Y_test_df.reset_index()
        Y_test_df.unique_id = Y_test_df.unique_id.astype('category')
        Y_test_df.unique_id = Y_test_df.unique_id.cat.set_categories(S_df.index)
        Y_test_df = Y_test_df.sort_values(by=['unique_id', 'ds'])

        Y_rec_df = Y_rec_df.reset_index()
        Y_rec_df.unique_id = Y_rec_df.unique_id.astype('category')
        Y_rec_df.unique_id = Y_rec_df.unique_id.cat.set_categories(S_df.index)
        Y_rec_df = Y_rec_df.sort_values(by=['seed', 'unique_id', 'ds'])

        Y_rec_df.to_csv(yrec_file, index=False)

        return Y_rec_df, Y_test_df, Y_train_df

In [None]:
# Parse execution parameters
verbose = True
intervals_method = 'bootstrap'
dataset = 'Favorita200'

assert intervals_method in ['bootstrap', 'normality', 'permbu'], \
    "Select `--intervals_method` from ['bootstrap', 'normality', 'permbu']"

available_datasets = ['Favorita200', 'Favorita500', 'FavoritaComplete']
assert dataset in available_datasets, \
    "Select `--dataset` from ['Favorita200', 'Favorita500', 'FavoritaComplete']"

LEVEL = np.arange(0, 100, 2)
qs = [[50-lv/2, 50+lv/2] for lv in LEVEL]
QUANTILES = np.sort(np.concatenate(qs)/100)

# Run experiments
Y_all_df, S_df, tags = FavoritaData.load(directory='./data/favorita/', group='Favorita200')
items = Y_all_df.item_id.unique()

print('\n')
print(f'{intervals_method.upper()} {dataset} statistical baselines evaluation \n')
#for item_id in items:
for item_id in [112830, 1916577]:
    Y_rec_df, Y_test_df, Y_train_df = run_baselines(item_id=item_id, dataset=dataset,
                                                    intervals_method=intervals_method,
                                                    verbose=verbose)
    print('\n')