hierarchicalforecast/methods.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/methods.ipynb.

# %% auto 0
__all__ = ['BottomUp', 'BottomUpSparse', 'TopDown', 'MiddleOut', 'MinTrace', 'MinTraceSparse', 'OptimalCombination', 'ERM']

# %% ../nbs/methods.ipynb 3
import warnings
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from typing import Callable, Dict, List, Optional, Union

import numpy as np
from numba import njit
from quadprog import solve_qp
from scipy import sparse

# %% ../nbs/methods.ipynb 4
from .utils import is_strictly_hierarchical, cov2corr
from .probabilistic_methods import Normality, Bootstrap, PERMBU

# %% ../nbs/methods.ipynb 6
class HReconciler:
    fitted = False
    is_sparse_method = False

    def _get_sampler(self,
                     intervals_method,
                     S, P, y_hat,
                     y_insample, y_hat_insample,
                     W, sigmah, num_samples, seed, tags):
        if intervals_method == 'normality':
            sampler = Normality(
                        S=S, P=P,
                        y_hat=y_hat,
                        W=W, sigmah=sigmah,
                        seed=seed)
        elif intervals_method == 'permbu':
            sampler = PERMBU(
                        S=S, P=P,
                        y_hat = (S @ (P @ y_hat)),
                        tags=tags,
                        y_insample=y_insample, 
                        y_hat_insample=y_hat_insample,
                        sigmah=sigmah,
                        num_samples=num_samples,
                        seed=seed)
        elif intervals_method == 'bootstrap':
            sampler = Bootstrap(
                        S=S, P=P, 
                        y_hat=y_hat,
                        y_insample=y_insample,
                        y_hat_insample=y_hat_insample,
                        num_samples=num_samples,
                        seed=seed)
        else:
            sampler = None
        return sampler    

    def _reconcile(self,
                   S: np.ndarray,
                   P: np.ndarray,
                   y_hat: np.ndarray,
                   SP: np.ndarray = None,
                   level: Optional[List[int]] = None,
                   sampler: Optional[Callable] = None):

        # Mean reconciliation
        res = {'mean': (S @ (P @ y_hat))}

        # Probabilistic reconciliation
        if (level is not None) and (sampler is not None):
            # Update results dictionary within
            # Vectorized quantiles
            quantiles = np.concatenate(
                [[(100 - lv) / 200, ((100 - lv) / 200) + lv / 100] for lv in level])
            quantiles = np.sort(quantiles)
            res = sampler.get_prediction_quantiles(res, quantiles)

        return res

    def predict(self,
                S: np.ndarray,
                y_hat: np.ndarray,
                level: Optional[List[int]] = None):
        """Predict using reconciler.

        Predict using fitted mean and probabilistic reconcilers.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>

        **Returns:**<br>
        `y_tilde`: Reconciliated predictions.
        """
        if not self.fitted:
            raise Exception("This model instance is not fitted yet, Call fit method.")
   
        return self._reconcile(S=S, P=self.P, y_hat=y_hat,
                               sampler=self.sampler, level=level)

    def sample(self,
               num_samples: int):
        """Sample probabilistic coherent distribution.

        Generates n samples from a probabilistic coherent distribution.
        The method uses fitted mean and probabilistic reconcilers, defined by
        the `intervals_method` selected during the reconciler's
        instantiation. Currently available: `normality`, `bootstrap`, `permbu`.

        **Parameters:**<br>
        `num_samples`: int, number of samples generated from coherent distribution.<br>

        **Returns:**<br>
        `samples`: Coherent samples of size (`num_series`, `horizon`, `num_samples`).
        """
        if not self.fitted:
            raise Exception("This model instance is not fitted yet, Call fit method.")
        if self.sampler is None:
            raise Exception("This model instance does not have sampler. Call fit with `intervals_method`.")

        samples = self.sampler.get_samples(num_samples=num_samples)
        return samples

# %% ../nbs/methods.ipynb 8
class BottomUp(HReconciler):
    """Bottom Up Reconciliation Class.
    The most basic hierarchical reconciliation is performed using an Bottom-Up strategy. It was proposed for 
    the first time by Orcutt in 1968.
    The corresponding hierarchical \"projection\" matrix is defined as:
    $$\mathbf{P}_{\\text{BU}} = [\mathbf{0}_{\mathrm{[b],[a]}}\;|\;\mathbf{I}_{\mathrm{[b][b]}}]$$

    **Parameters:**<br>
    None

    **References:**<br>
    - [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). \"Data aggregation and information loss\". The American 
    Economic Review, 58 , 773(787)](http://www.jstor.org/stable/1815532).
    """
    insample = False

    def _get_PW_matrices(self, S, idx_bottom):
        n_hiers, n_bottom = S.shape
        P = np.zeros_like(S, dtype=np.float32)
        P[idx_bottom] = S[idx_bottom]
        P = P.T
        W = np.eye(n_hiers, dtype=np.float32)
        return P, W

    def fit(self,
            S: np.ndarray,
            y_hat: np.ndarray,
            idx_bottom: np.ndarray,
            y_insample: Optional[np.ndarray] = None,
            y_hat_insample: Optional[np.ndarray] = None,
            sigmah: Optional[np.ndarray] = None,
            intervals_method: Optional[str] = None,
            num_samples: Optional[int] = None,
            seed: Optional[int] = None,            
            tags: Dict[str, np.ndarray] = None):
        """Bottom Up Fit Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `intervals_method`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>
        `**sampler_kwargs`: Coherent sampler instantiation arguments.<br>

        **Returns:**<br>
        `self`: object, fitted reconciler.
        """
        self.P, self.W = self._get_PW_matrices(S=S, idx_bottom=idx_bottom)
        self.sampler = self._get_sampler(S=S,
                                         P=self.P,
                                         W=self.W,
                                         y_hat=y_hat,
                                         y_insample=y_insample,
                                         y_hat_insample=y_hat_insample,
                                         sigmah=sigmah, 
                                         intervals_method=intervals_method,
                                         num_samples=num_samples,
                                         seed=seed,
                                         tags=tags)
        self.fitted = True
        return self

    def fit_predict(self,
                    S: np.ndarray,
                    y_hat: np.ndarray,
                    idx_bottom: np.ndarray,
                    y_insample: Optional[np.ndarray] = None,
                    y_hat_insample: Optional[np.ndarray] = None,
                    sigmah: Optional[np.ndarray] = None,
                    level: Optional[List[int]] = None,
                    intervals_method: Optional[str] = None,
                    num_samples: Optional[int] = None,
                    seed: Optional[int] = None,
                    tags: Dict[str, np.ndarray] = None):
        """BottomUp Reconciliation Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `intervals_method`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>
        `**sampler_kwargs`: Coherent sampler instantiation arguments.<br>

        **Returns:**<br>
        `y_tilde`: Reconciliated y_hat using the Bottom Up approach.
        """
        # Fit creates P, W and sampler attributes
        self.fit(S=S,
                 y_hat=y_hat,
                 y_insample=y_insample,
                 y_hat_insample=y_hat_insample,
                 sigmah=sigmah,
                 intervals_method=intervals_method, 
                 num_samples=num_samples,
                 seed=seed,
                 tags=tags, idx_bottom=idx_bottom)

        return self._reconcile(S=S, P=self.P, y_hat=y_hat,
                               sampler=self.sampler, level=level)

    __call__ = fit_predict

# %% ../nbs/methods.ipynb 9
class BottomUpSparse(BottomUp):
    """BottomUpSparse Reconciliation Class.

    This is the implementation of a Bottom Up reconciliation using the sparse
    matrix approach. It works much more efficient on datasets with many time series.
    [makoren: At least I hope so, I only checked up until ~20k time series, and
    there's no real improvement, it would be great to check for smth like 1M time
    series, where the dense S matrix really stops fitting in memory]

    See the parent class for more details.
    """
    is_sparse_method = True

    def _get_PW_matrices(self, S, idx_bottom):
        n_hiers, n_bottom = S.shape
        P = sparse.lil_matrix(S.shape, dtype=np.float32)
        P[idx_bottom] = S[idx_bottom]
        P = sparse.csr_matrix(P.T)
        W = sparse.identity(n_hiers, dtype=np.float32)
        return P, W

# %% ../nbs/methods.ipynb 22
def _get_child_nodes(S: np.ndarray, tags: Dict[str, np.ndarray]):
    level_names = list(tags.keys())
    nodes = OrderedDict()
    for i_level, level in enumerate(level_names[:-1]):
        parent = tags[level]
        child = np.zeros_like(S)
        idx_child = tags[level_names[i_level+1]] 
        child[idx_child] = S[idx_child]
        nodes_level = {}
        for idx_parent_node in parent:
            parent_node = S[idx_parent_node]
            idx_node = child * parent_node.astype(bool)
            idx_node, = np.where(idx_node.sum(axis=1) > 0)
            nodes_level[idx_parent_node] = [idx for idx in idx_child if idx in idx_node]
        nodes[level] = nodes_level
    return nodes

# %% ../nbs/methods.ipynb 23
def _reconcile_fcst_proportions(S: np.ndarray, y_hat: np.ndarray,
                                tags: Dict[str, np.ndarray],
                                nodes: Dict[str, Dict[int, np.ndarray]],
                                idx_top: int):
    reconciled = np.zeros_like(y_hat)
    reconciled[idx_top] = y_hat[idx_top]
    level_names = list(tags.keys())
    for i_level, level in enumerate(level_names[:-1]):
        nodes_level = nodes[level]
        for idx_parent, idx_childs in nodes_level.items():
            fcst_parent = reconciled[idx_parent]
            childs_sum = y_hat[idx_childs].sum()
            for idx_child in idx_childs:
                reconciled[idx_child] = y_hat[idx_child] * fcst_parent / childs_sum
    return reconciled

# %% ../nbs/methods.ipynb 24
class TopDown(HReconciler):
    """Top Down Reconciliation Class.

    The Top Down hierarchical reconciliation method, distributes the total aggregate predictions and decomposes 
    it down the hierarchy using proportions $\mathbf{p}_{\mathrm{[b]}}$ that can be actual historical values 
    or estimated.

    $$\mathbf{P}=[\mathbf{p}_{\mathrm{[b]}}\;|\;\mathbf{0}_{\mathrm{[b][a,b\;-1]}}]$$
    **Parameters:**<br>
    `method`: One of `forecast_proportions`, `average_proportions` and `proportion_averages`.<br>

    **References:**<br>
    - [CW. Gross (1990). \"Disaggregation methods to expedite product line forecasting\". Journal of Forecasting, 9 , 233–254. 
    doi:10.1002/for.3980090304](https://onlinelibrary.wiley.com/doi/abs/10.1002/for.3980090304).<br>
    - [G. Fliedner (1999). \"An investigation of aggregate variable time series forecast strategies with specific subaggregate 
    time series statistical correlation\". Computers and Operations Research, 26 , 1133–1149. 
    doi:10.1016/S0305-0548(99)00017-9](https://doi.org/10.1016/S0305-0548(99)00017-9).
    """
    def __init__(self, 
                 method: str):
        self.method = method
        self.insample = method in ['average_proportions', 'proportion_averages']

    def _get_PW_matrices(self,
                         S: np.ndarray,
                         y_hat: np.ndarray,
                         tags: Dict[str, np.ndarray],
                         y_insample: Optional[np.ndarray] = None):
        if not is_strictly_hierarchical(S, tags):
            raise ValueError('Top down reconciliation requires strictly hierarchical structures.')

        n_hiers, n_bottom = S.shape
        idx_top = int(S.sum(axis=1).argmax())
        levels_ = dict(sorted(tags.items(), key=lambda x: len(x[1])))
        idx_bottom = levels_[list(levels_)[-1]]

        y_top = y_insample[idx_top]
        y_btm = y_insample[idx_bottom]
        if self.method == 'average_proportions':
            prop = np.mean(y_btm / y_top, axis=1)
        elif self.method == 'proportion_averages':
            prop = np.mean(y_btm, axis=1) / np.mean(y_top)
        elif self.method == 'forecast_proportions':
            raise Exception(f'Fit method not implemented for {self.method} yet')
        else:
            raise Exception(f'Unknown method {self.method}')

        P = np.zeros_like(S, np.float64).T #float 64 if prop is too small, happens with wiki2
        P[:, idx_top] = prop
        W = np.eye(n_hiers, dtype=np.float32)
        return P, W

    def fit(self, 
            S,
            y_hat,
            y_insample: Optional[np.ndarray] = None,
            y_hat_insample: Optional[np.ndarray] = None,
            sigmah: Optional[np.ndarray] = None,
            intervals_method: Optional[str] = None,
            num_samples: Optional[int] = None,
            seed: Optional[int] = None,            
            tags: Dict[str, np.ndarray] = None,
            idx_bottom: Optional[np.ndarray] = None):
        """TopDown Fit Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `tags`: Each key is a level and each value its `S` indices.<br>
        `y_insample`: Insample values of size (`base`, `insample_size`). Optional for `forecast_proportions` method.<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `intervals_method`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>
        `**sampler_kwargs`: Coherent sampler instantiation arguments.<br>

        **Returns:**<br>
        `self`: object, fitted reconciler.
        """
        self.P, self.W = self._get_PW_matrices(S=S, y_hat=y_hat, 
                                               tags=tags, y_insample=y_insample)
        self.sampler = self._get_sampler(S=S,
                                         P=self.P,
                                         W=self.W,
                                         y_hat=y_hat,
                                         y_insample=y_insample,
                                         y_hat_insample=y_hat_insample,
                                         sigmah=sigmah, 
                                         intervals_method=intervals_method,
                                         num_samples=num_samples,
                                         seed=seed,
                                         tags=tags)
        self.fitted = True
        return self

    def fit_predict(self,
                    S: np.ndarray,
                    y_hat: np.ndarray,
                    tags: Dict[str, np.ndarray],
                    idx_bottom: np.ndarray = None,
                    y_insample: Optional[np.ndarray] = None,
                    y_hat_insample: Optional[np.ndarray] = None,
                    sigmah: Optional[np.ndarray] = None,
                    level: Optional[List[int]] = None,
                    intervals_method: Optional[str] = None,
                    num_samples: Optional[int] = None,
                    seed: Optional[int] = None):
        """Top Down Reconciliation Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `tags`: Each key is a level and each value its `S` indices.<br>
        `y_insample`: Insample values of size (`base`, `insample_size`). Optional for `forecast_proportions` method.<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `intervals_method`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>
        `**sampler_kwargs`: Coherent sampler instantiation arguments.<br>

        **Returns:**<br>
        `y_tilde`: Reconciliated y_hat using the Top Down approach.
        """
        if self.method == 'forecast_proportions':
            idx_top = int(S.sum(axis=1).argmax())
            levels_ = dict(sorted(tags.items(), key=lambda x: len(x[1])))
            if level is not None:
                warnings.warn('Prediction intervals not implement for `forecast_proportions`')
            nodes = _get_child_nodes(S=S, tags=levels_)
            reconciled = [_reconcile_fcst_proportions(S=S, y_hat=y_hat_[:, None], 
                                                      tags=levels_, 
                                                      nodes=nodes,
                                                      idx_top=idx_top) \
                          for y_hat_ in y_hat.T]
            reconciled = np.hstack(reconciled)
            return {'mean': reconciled}
        else:
            # Fit creates P, W and sampler attributes
            self.fit(S=S,
                     y_hat=y_hat,
                     y_insample=y_insample,
                     y_hat_insample=y_hat_insample,
                     sigmah=sigmah,
                     intervals_method=intervals_method,
                     num_samples=num_samples,
                     seed=seed,
                     tags=tags, idx_bottom=idx_bottom)
            return self._reconcile(S=S, P=self.P, y_hat=y_hat,
                                   level=level, sampler=self.sampler)

    __call__ = fit_predict

# %% ../nbs/methods.ipynb 32
class MiddleOut(HReconciler):
    """Middle Out Reconciliation Class.

    This method is only available for **strictly hierarchical structures**. It anchors the base predictions 
    in a middle level. The levels above the base predictions use the Bottom-Up approach, while the levels 
    below use a Top-Down.

    **Parameters:**<br>
    `middle_level`: Middle level.<br>
    `top_down_method`: One of `forecast_proportions`, `average_proportions` and `proportion_averages`.<br>

    **References:**<br>
    - [Hyndman, R.J., & Athanasopoulos, G. (2021). \"Forecasting: principles and practice, 3rd edition:
    Chapter 11: Forecasting hierarchical and grouped series.\". OTexts: Melbourne, Australia. OTexts.com/fpp3 
    Accessed on July 2022.](https://otexts.com/fpp3/hierarchical.html)

    """
    def __init__(self, 
                 middle_level: str,
                 top_down_method: str):
        self.middle_level = middle_level
        self.top_down_method = top_down_method 
        self.insample = top_down_method in ['average_proportions', 'proportion_averages']

    def _get_PW_matrices(self, **kwargs):
        raise Exception('Not implemented')

    def fit(self, **kwargs):
        raise Exception('Not implemented')

    def predict(self, **kwargs):
        raise Exception('Not implemented')

    def fit_predict(self, 
                    S: np.ndarray,
                    y_hat: np.ndarray,
                    tags: Dict[str, np.ndarray],
                    y_insample: Optional[np.ndarray] = None,
                    level: Optional[List[int]] = None,
                    intervals_method: Optional[str] = None):
        """Middle Out Reconciliation Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `tags`: Each key is a level and each value its `S` indices.<br>
        `y_insample`: Insample values of size (`base`, `insample_size`). Only used for `forecast_proportions`<br>

        **Returns:**<br>
        `y_tilde`: Reconciliated y_hat using the Middle Out approach.
        """
        if not is_strictly_hierarchical(S, tags):
            raise ValueError('Middle out reconciliation requires strictly hierarchical structures.')
        if self.middle_level not in tags.keys():
            raise ValueError('You have to provide a `middle_level` in `tags`.')

        levels_ = dict(sorted(tags.items(), key=lambda x: len(x[1])))
        reconciled = np.full_like(y_hat, fill_value=np.nan)
        cut_nodes = levels_[self.middle_level]
        # bottom up reconciliation
        idxs_bu = []
        for node, idx_node in levels_.items():
            idxs_bu.append(idx_node)
            if node == self.middle_level:
                break
        idxs_bu = np.hstack(idxs_bu)
        #bottom up forecasts
        bu = BottomUp().fit_predict(
            S=np.unique(S[idxs_bu], axis=1), 
            y_hat=y_hat[idxs_bu], 
            idx_bottom=np.arange(len(idxs_bu))[-len(cut_nodes):]
        )
        reconciled[idxs_bu] = bu['mean']

        #top down
        child_nodes = _get_child_nodes(S, levels_)
        # parents contains each node in the middle out level
        # as key. The values of each node are the levels that
        # are conected to that node.
        parents = {node: {self.middle_level: np.array([node])} for node in cut_nodes}
        level_names = list(levels_.keys())
        for lv, lv_child in zip(level_names[:-1], level_names[1:]):
            # if lv is not part of the middle out to bottom
            # structure we continue
            if lv not in list(parents.values())[0].keys():
                continue
            for idx_middle_out in parents.keys():
                idxs_parents = parents[idx_middle_out].values()
                complete_idxs_child = []
                for idx_parent, idxs_child in child_nodes[lv].items():
                    if any(idx_parent in val for val in idxs_parents):
                        complete_idxs_child.append(idxs_child)
                parents[idx_middle_out][lv_child] = np.hstack(complete_idxs_child)

        for node, levels_node in parents.items():
            idxs_node = np.hstack(list(levels_node.values()))
            S_node = S[idxs_node]
            S_node = S_node[:,~np.all(S_node == 0, axis=0)]
            counter = 0
            levels_node_ = deepcopy(levels_node)
            for lv_name, idxs_level in levels_node_.items():
                idxs_len = len(idxs_level)
                levels_node_[lv_name] = np.arange(counter, idxs_len + counter)
                counter += idxs_len
            td = TopDown(self.top_down_method).fit_predict(
                S=S_node, 
                y_hat=y_hat[idxs_node], 
                y_insample=y_insample[idxs_node] if y_insample is not None else None, 
                tags=levels_node_, 
            )
            reconciled[idxs_node] = td['mean']
        return {'mean': reconciled}

    __call__ = fit_predict

# %% ../nbs/methods.ipynb 37
def crossprod(x):
    return x.T @ x

# %% ../nbs/methods.ipynb 38
class MinTrace(HReconciler):
    """MinTrace Reconciliation Class.

    This reconciliation algorithm proposed by Wickramasuriya et al. depends on a generalized least squares estimator 
    and an estimator of the covariance matrix of the coherency errors $\mathbf{W}_{h}$. The Min Trace algorithm 
    minimizes the squared errors for the coherent forecasts under an unbiasedness assumption; the solution has a 
    closed form.<br>

    $$
    \mathbf{P}_{\\text{MinT}}=\\left(\mathbf{S}^{\intercal}\mathbf{W}_{h}\mathbf{S}\\right)^{-1}
    \mathbf{S}^{\intercal}\mathbf{W}^{-1}_{h}
    $$

    **Parameters:**<br>
    `method`: str, one of `ols`, `wls_struct`, `wls_var`, `mint_shrink`, `mint_cov`.<br>
    `nonnegative`: bool, reconciled forecasts should be nonnegative?<br>
    `mint_shr_ridge`: float=2e-8, ridge numeric protection to MinTrace-shr covariance estimator.<br>
    `num_threads`: int=1, number of threads to use for solving the optimization problems.

    **References:**<br>
    - [Wickramasuriya, S. L., Athanasopoulos, G., & Hyndman, R. J. (2019). \"Optimal forecast reconciliation for
    hierarchical and grouped time series through trace minimization\". Journal of the American Statistical Association, 
    114 , 804–819. doi:10.1080/01621459.2018.1448825.](https://robjhyndman.com/publications/mint/).
    - [Wickramasuriya, S.L., Turlach, B.A. & Hyndman, R.J. (2020). \"Optimal non-negative
    forecast reconciliation". Stat Comput 30, 1167–1182,
    https://doi.org/10.1007/s11222-020-09930-0](https://robjhyndman.com/publications/nnmint/).
    """
    def __init__(self, 
                 method: str,
                 nonnegative: bool = False,
                 mint_shr_ridge: Optional[float] = 2e-8,
                 num_threads: int = 1):
        self.method = method
        self.nonnegative = nonnegative
        self.insample = method in ['wls_var', 'mint_cov', 'mint_shrink']
        if method == 'mint_shrink':
            self.mint_shr_ridge = mint_shr_ridge
        self.num_threads = num_threads

    def _get_PW_matrices(self, 
                  S: np.ndarray,
                  y_hat: np.ndarray,
                  y_insample: Optional[np.ndarray] = None,
                  y_hat_insample: Optional[np.ndarray] = None,
                  idx_bottom: Optional[List[int]] = None,):
        # shape residuals_insample (n_hiers, obs)
        res_methods = ['wls_var', 'mint_cov', 'mint_shrink']
        diag_only_methods = ['ols', 'wls_struct', 'wls_var']
        if self.method in res_methods and y_insample is None and y_hat_insample is None:
            raise ValueError(f"For methods {', '.join(res_methods)} you need to pass residuals")
        n_hiers, n_bottom = S.shape
        if self.method == 'ols':
            W = np.eye(n_hiers)
        elif self.method == 'wls_struct':
            W = np.diag(S @ np.ones((n_bottom,)))
        elif self.method in res_methods:
            # Residuals with shape (obs, n_hiers)
            residuals = (y_insample - y_hat_insample).T
            n, _ = residuals.shape

            # Protection: against overfitted model
            residuals_sum = np.sum(residuals, axis=0)
            zero_residual_prc = np.abs(residuals_sum) < 1e-4
            zero_residual_prc = np.mean(zero_residual_prc)
            if zero_residual_prc > .98:
                raise Exception(f'Insample residuals close to 0, zero_residual_prc={zero_residual_prc}. Check `Y_df`')

            # Protection: cases where data is unavailable/nan
            masked_res = np.ma.array(residuals, mask=np.isnan(residuals))
            covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data

            if self.method == 'wls_var':
                W = np.diag(np.diag(covm))
            elif self.method == 'mint_cov':
                W = covm
            elif self.method == 'mint_shrink':
                # Schäfer and Strimmer 2005, scale invariant shrinkage
                # lasso or ridge might improve numerical stability but
                # this version follows https://robjhyndman.com/papers/MinT.pdf
                tar = np.diag(np.diag(covm))

                # Protections: constant's correlation set to 0
                # standardized residuals 0 where residual_std=0
                corm, residual_std = cov2corr(covm, return_std=True)
                corm = np.nan_to_num(corm, nan=0.0)
                xs = np.divide(residuals, residual_std, 
                               out=np.zeros_like(residuals), where=residual_std!=0)

                xs = xs[~np.isnan(xs).any(axis=1), :]
                v = (1 / (n * (n - 1))) * (crossprod(xs ** 2) - (1 / n) * (crossprod(xs) ** 2))
                np.fill_diagonal(v, 0)

                # Protection: constant's correlation set to 0
                corapn = cov2corr(tar)
                corapn = np.nan_to_num(corapn, nan=0.0)
                d = (corm - corapn) ** 2
                lmd = v.sum() / d.sum()
                lmd = max(min(lmd, 1), 0)

                # Protection: final ridge diagonal protection
                W = (lmd * tar + (1 - lmd) * covm) + self.mint_shr_ridge
        else:
            raise ValueError(f'Unknown reconciliation method {self.method}')

        if self.method not in diag_only_methods:
            eigenvalues, _ = np.linalg.eig(W)
        else:
            eigenvalues = np.diag(W)

        if any(eigenvalues < 1e-8):
            raise Exception(f'min_trace ({self.method}) needs covariance matrix to be positive definite.')

        else:
            # compute P for free reconciliation
            if self.method not in diag_only_methods:
                R = S.T @ np.linalg.pinv(W)
            else:
                R = S.T * np.reciprocal(np.diag(W))
            P = np.linalg.pinv(R @ S) @ R

        return P, W

    def fit(self,
            S,
            y_hat,
            y_insample: Optional[np.ndarray] = None,
            y_hat_insample: Optional[np.ndarray] = None,
            sigmah: Optional[np.ndarray] = None,
            intervals_method: Optional[str] = None,
            num_samples: Optional[int] = None,
            seed: Optional[int] = None,            
            tags: Dict[str, np.ndarray] = None,
            idx_bottom: Optional[np.ndarray] = None):
        """MinTrace Fit Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `tags`: Each key is a level and each value its `S` indices.<br>
        `y_insample`: Insample values of size (`base`, `insample_size`). Optional for `forecast_proportions` method.<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `intervals_method`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>
        `**sampler_kwargs`: Coherent sampler instantiation arguments.<br>

        **Returns:**<br>
        `self`: object, fitted reconciler.
        """
        self.y_hat = y_hat
        self.P, self.W = self._get_PW_matrices(S=S, y_hat=y_hat, 
                                               y_insample=y_insample, y_hat_insample=y_hat_insample,
                                               idx_bottom=idx_bottom)

        if self.nonnegative:
            _, n_bottom = S.shape
            W_inv = np.linalg.pinv(self.W)
            negatives = y_hat < 0
            if negatives.any():
                warnings.warn('Replacing negative forecasts with zero.')
                y_hat = np.copy(y_hat)
                y_hat[negatives] = 0.
            # Quadratic progamming formulation
            # here we are solving the quadratic programming problem
            # formulated in the origial paper
            # https://robjhyndman.com/publications/nnmint/
            # The library quadprog was chosen
            # based on these benchmarks:
            # https://scaron.info/blog/quadratic-programming-in-python.html
            a = S.T @ W_inv
            G = a @ S
            C = np.eye(n_bottom)
            b = np.zeros(n_bottom)
            # the quadratic programming problem
            # returns the forecasts of the bottom series
            if self.num_threads == 1:
                bottom_fcts = np.apply_along_axis(lambda y_hat: solve_qp(G=G, a=a @ y_hat, C=C, b=b)[0], 
                                                  axis=0, arr=y_hat)
            else:
                futures = []
                with ThreadPoolExecutor(self.num_threads) as executor:
                    for j in range(y_hat.shape[1]):
                        future = executor.submit(solve_qp, G=G, a=a @ y_hat[:, j], C=C, b=b)
                        futures.append(future)
                    bottom_fcts = np.hstack([f.result()[0][:, None] for f in futures])
            if not np.all(bottom_fcts > -1e-8):
                raise Exception('nonnegative optimization failed')
            # remove negative values close to zero
            bottom_fcts = np.clip(np.float32(bottom_fcts), a_min=0, a_max=None)
            self.y_hat = S @ bottom_fcts # Hack

            # Overwrite P, W and sampler attributes with BottomUp's
            self.P, self.W = BottomUp()._get_PW_matrices(S=S, idx_bottom=idx_bottom)            

        self.sampler = self._get_sampler(S=S,
                                         P=self.P,
                                         W=self.W,
                                         y_hat=y_hat,
                                         y_insample=y_insample,
                                         y_hat_insample=y_hat_insample,
                                         sigmah=sigmah, 
                                         intervals_method=intervals_method,
                                         num_samples=num_samples,
                                         seed=seed,
                                         tags=tags)
        self.fitted = True
        return self

    def fit_predict(self,
                    S: np.ndarray,
                    y_hat: np.ndarray,
                    idx_bottom: np.ndarray = None,
                    y_insample: Optional[np.ndarray] = None,
                    y_hat_insample: Optional[np.ndarray] = None,
                    sigmah: Optional[np.ndarray] = None,
                    level: Optional[List[int]] = None,
                    intervals_method: Optional[str] = None,
                    num_samples: Optional[int] = None,
                    seed: Optional[int] = None,                    
                    tags: Dict[str, np.ndarray] = None):
        """MinTrace Reconciliation Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `y_insample`: Insample values of size (`base`, `insample_size`). Only used by `wls_var`, `mint_cov`, `mint_shrink`<br>
        `y_hat_insample`: Insample fitted values of size (`base`, `insample_size`). Only used by `wls_var`, `mint_cov`, `mint_shrink`<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `sampler`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>

        **Returns:**<br>
        `y_tilde`: Reconciliated y_hat using the MinTrace approach.
        """
        if self.nonnegative:
            if (level is not None) and intervals_method in ['bootstrap', 'permbu']:
                raise Exception('nonnegative reconciliation is not compatible with bootstrap forecasts')
            if idx_bottom is None:
                raise Exception('idx_bottom needed for nonnegative reconciliation')

        # Fit creates P, W and sampler attributes
        self.fit(S=S,
                 y_hat=y_hat,
                 y_insample=y_insample,
                 y_hat_insample=y_hat_insample,
                 sigmah=sigmah,
                 intervals_method=intervals_method,
                 num_samples=num_samples,
                 seed=seed,
                 tags=tags, idx_bottom=idx_bottom)

        return self._reconcile(S=S, P=self.P, y_hat=self.y_hat,
                               level=level, sampler=self.sampler)

    __call__ = fit_predict

# %% ../nbs/methods.ipynb 39
class MinTraceSparse(MinTrace):
    """MinTraceSparse Reconciliation Class.

    This is the implementation of a subset of MinTrace features using the sparse
    matrix approach. It works much more efficient on datasets with many time series.

    See the parent class for more details.

    Currently supported:
    * Methods using diagonal W matrix, i.e. "ols", "wls_struct", "wls_var",
    * The standard MinT version (non-negative is not supported).

    Note: due to the numerical instability of the matrix inversion when creating the
    P matrix, the method is NOT guaranteed to give identical results to the non-sparse
    version.
    """
    is_sparse_method = True

    def _get_PW_matrices(
        self,
        S: Union[np.ndarray, sparse.spmatrix],
        y_hat: np.ndarray,
        y_insample: Optional[np.ndarray] = None,
        y_hat_insample: Optional[np.ndarray] = None,
        idx_bottom: Optional[List[int]] = None,
    ):
        # shape residuals_insample (n_hiers, obs)
        res_methods = ["wls_var", "mint_cov", "mint_shrink"]
        diag_only_methods = ["ols", "wls_struct", "wls_var"]

        if self.method not in diag_only_methods:
            raise NotImplementedError(
                "Only the methods with diagonal W are supported as sparse operations"
            )

        if self.nonnegative:
            raise NotImplementedError(
                "Non-negative MinT is currently not implemented as sparse"
            )

        S = sparse.csr_matrix(S)

        if self.method in res_methods and y_insample is None and y_hat_insample is None:
            raise ValueError(
                f"For methods {', '.join(res_methods)} you need to pass residuals"
            )
        n_hiers, n_bottom = S.shape

        if self.method == "ols":
            W_diag = np.ones(n_hiers)
        elif self.method == "wls_struct":
            W_diag = S @ np.ones((n_bottom,))
        elif self.method == "wls_var":
            # Residuals with shape (obs, n_hiers)
            residuals = (y_insample - y_hat_insample).T
            n, _ = residuals.shape

            # Protection: against overfitted model
            residuals_sum = np.sum(residuals, axis=0)
            zero_residual_prc = np.abs(residuals_sum) < 1e-4
            zero_residual_prc = np.mean(zero_residual_prc)
            if zero_residual_prc > 0.98:
                raise Exception(
                    f"Insample residuals close to 0, zero_residual_prc={zero_residual_prc}. Check `Y_df`"
                )

            # Protection: cases where data is unavailable/nan
            # makoren: this masking stuff causes more harm than good, I found the results in the presence
            # of nan-s can often be rubbish, I'd argue it's better to fail than give rubbish results, here
            # the code is simply failing if it encounters nan in the variance vector.
            # masked_res = np.ma.array(residuals, mask=np.isnan(residuals))
            # covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data

            W_diag = np.var(residuals, axis=0, ddof=1)
        else:
            raise ValueError(f"Unknown reconciliation method {self.method}")

        if any(W_diag < 1e-8):
            raise Exception(
                f"min_trace ({self.method}) needs covariance matrix to be positive definite."
            )

        if any(np.isnan(W_diag)):
            raise Exception(
                f"min_trace ({self.method}) needs covariance matrix to be positive definite (not nan)."
            )

        M = sparse.spdiags(np.reciprocal(W_diag), 0, W_diag.size, W_diag.size)
        R = sparse.csr_matrix(S.T @ M)

        # The implementation of P acting on a vector:
        def get_P_action(y):
            b = R @ y

            A = sparse.linalg.LinearOperator(
                (b.size, b.size), matvec=lambda v: R @ (S @ v)
            )

            x_tilde, exit_code = sparse.linalg.bicgstab(A, b, atol="legacy")

            return x_tilde

        P = sparse.linalg.LinearOperator(
            (S.shape[1], y_hat.shape[0]), matvec=get_P_action
        )
        W = sparse.spdiags(W_diag, 0, W_diag.size, W_diag.size)

        return P, W

# %% ../nbs/methods.ipynb 49
class OptimalCombination(MinTrace):
    """Optimal Combination Reconciliation Class.

    This reconciliation algorithm was proposed by Hyndman et al. 2011, the method uses generalized least squares 
    estimator using the coherency errors covariance matrix. Consider the covariance of the base forecast 
    $\\textrm{Var}(\epsilon_{h}) = \Sigma_{h}$, the $\mathbf{P}$ matrix of this method is defined by:
    $$ \mathbf{P} = \\left(\mathbf{S}^{\intercal}\Sigma_{h}^{\dagger}\mathbf{S}\\right)^{-1}\mathbf{S}^{\intercal}\Sigma^{\dagger}_{h}$$
    where $\Sigma_{h}^{\dagger}$ denotes the variance pseudo-inverse. The method was later proven equivalent to 
    `MinTrace` variants.

    **Parameters:**<br>
    `method`: str, allowed optimal combination methods: 'ols', 'wls_struct'.<br>
    `nonnegative`: bool, reconciled forecasts should be nonnegative?<br>

    **References:**<br>
    - [Rob J. Hyndman, Roman A. Ahmed, George Athanasopoulos, Han Lin Shang (2010). \"Optimal Combination Forecasts for 
    Hierarchical Time Series\".](https://robjhyndman.com/papers/Hierarchical6.pdf).<br>
    - [Shanika L. Wickramasuriya, George Athanasopoulos and Rob J. Hyndman (2010). \"Optimal Combination Forecasts for 
    Hierarchical Time Series\".](https://robjhyndman.com/papers/MinT.pdf).
    - [Wickramasuriya, S.L., Turlach, B.A. & Hyndman, R.J. (2020). \"Optimal non-negative
    forecast reconciliation". Stat Comput 30, 1167–1182, 
    https://doi.org/10.1007/s11222-020-09930-0](https://robjhyndman.com/publications/nnmint/).
    """
    def __init__(self,
                 method: str,
                 nonnegative: bool = False,
                 num_threads: int = 1):
        comb_methods = ['ols', 'wls_struct']
        if method not in comb_methods:
            raise ValueError(f"Optimal Combination class does not support method: \"{method}\"")
        super().__init__(method=method, nonnegative=nonnegative, num_threads=num_threads)
        self.insample = False

# %% ../nbs/methods.ipynb 58
@njit
def lasso(X: np.ndarray, y: np.ndarray, 
          lambda_reg: float, max_iters: int = 1_000,
          tol: float = 1e-4):
    # lasso cyclic coordinate descent
    n, feats = X.shape
    norms = (X ** 2).sum(axis=0)
    beta = np.zeros(feats, dtype=np.float32)
    beta_changes = np.zeros(feats, dtype=np.float32)
    residuals = y.copy()
    
    for it in range(max_iters):
        for i, betai in enumerate(beta):
            # is feature is close to zero, we 
            # continue to the next.
            # in this case is optimal betai= 0
            if abs(norms[i]) < 1e-8:
                continue
            xi = X[:, i]
            #we calculate the normalized derivative
            rho = betai + xi.flatten().dot(residuals) / norms[i] #(norms[i] + 1e-3)
            #soft threshold
            beta[i] = np.sign(rho) * max(np.abs(rho) - lambda_reg * n / norms[i], 0.)#(norms[i] + 1e-3), 0.)
            beta_changes[i] = np.abs(betai - beta[i])
            if beta[i] != betai:
                residuals += (betai - beta[i]) * xi
        if max(beta_changes) < tol:
            break
    #print(it)
    return beta

# %% ../nbs/methods.ipynb 59
class ERM(HReconciler):
    """Optimal Combination Reconciliation Class.

    The Empirical Risk Minimization reconciliation strategy relaxes the unbiasedness assumptions from
    previous reconciliation methods like MinT and optimizes square errors between the reconciled predictions
    and the validation data to obtain an optimal reconciliation matrix P.
    
    The exact solution for $\mathbf{P}$ (`method='closed'`) follows the expression:
    $$\mathbf{P}^{*} = \\left(\mathbf{S}^{\intercal}\mathbf{S}\\right)^{-1}\mathbf{Y}^{\intercal}\hat{\mathbf{Y}}\\left(\hat{\mathbf{Y}}\hat{\mathbf{Y}}\\right)^{-1}$$

    The alternative Lasso regularized $\mathbf{P}$ solution (`method='reg_bu'`) is useful when the observations 
    of validation data is limited or the exact solution has low numerical stability.
    $$\mathbf{P}^{*} = \\text{argmin}_{\mathbf{P}} ||\mathbf{Y}-\mathbf{S} \mathbf{P} \hat{Y} ||^{2}_{2} + \lambda ||\mathbf{P}-\mathbf{P}_{\\text{BU}}||_{1}$$

    **Parameters:**<br>
    `method`: str, one of `closed`, `reg` and `reg_bu`.<br>
    `lambda_reg`: float, l1 regularizer for `reg` and `reg_bu`.<br>

    **References:**<br>
    - [Ben Taieb, S., & Koo, B. (2019). Regularized regression for hierarchical forecasting without 
    unbiasedness conditions. In Proceedings of the 25th ACM SIGKDD International Conference on Knowledge 
    Discovery & Data Mining KDD '19 (p. 1337-1347). New York, NY, USA: Association for Computing Machinery.](https://doi.org/10.1145/3292500.3330976).<br>
    """
    def __init__(self,
                 method: str,
                 lambda_reg: float = 1e-2):
        self.method = method
        self.lambda_reg = lambda_reg
        self.insample = True

    def _get_PW_matrices(self, 
                  S: np.ndarray,
                  y_hat: np.ndarray,
                  y_insample: np.ndarray,
                  y_hat_insample: np.ndarray,
                  idx_bottom: np.ndarray):
        n_hiers, n_bottom = S.shape
        # y_hat_insample shape (n_hiers, obs)
        # remove obs with nan values
        nan_idx = np.isnan(y_hat_insample).any(axis=0)
        y_insample = y_insample[:, ~nan_idx]
        y_hat_insample = y_hat_insample[:, ~nan_idx]
        #only using h validation steps to avoid 
        #computational burden
        #print(y_hat.shape)
        h = min(y_hat.shape[1], y_hat_insample.shape[1])
        y_hat_insample = y_hat_insample[:, -h:] # shape (h, n_hiers)
        y_insample = y_insample[:, -h:]
        if self.method == 'closed':
            B = np.linalg.inv(S.T @ S) @ S.T @ y_insample
            B = B.T
            P = np.linalg.pinv(y_hat_insample.T) @ B
            P = P.T
        elif self.method in ['reg', 'reg_bu']:
            X = np.kron(np.array(S, order='F'), np.array(y_hat_insample.T, order='F'))
            Pbu = np.zeros_like(S)
            if self.method == 'reg_bu':
                Pbu[idx_bottom] = S[idx_bottom]
            Pbu = Pbu.T
            Y = y_insample.T.flatten(order='F') - X @ Pbu.T.flatten(order='F')
            if self.lambda_reg is None:
                lambda_reg = np.max(np.abs(X.T.dot(Y)))
            else:
                lambda_reg = self.lambda_reg
            P = lasso(X, Y, lambda_reg)
            P = P + Pbu.T.flatten(order='F')
            P = P.reshape(-1, n_bottom, order='F').T
        else:
            raise ValueError(f'Unknown reconciliation method {self.method}')

        W = np.eye(n_hiers, dtype=np.float32)

        return P, W

    def fit(self,
            S,
            y_hat,
            y_insample,
            y_hat_insample,
            sigmah: Optional[np.ndarray] = None,
            intervals_method: Optional[str] = None,
            num_samples: Optional[int] = None,
            seed: Optional[int] = None,
            tags: Dict[str, np.ndarray] = None,
            idx_bottom: Optional[np.ndarray] = None):
        """ERM Fit Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `y_insample`: Train values of size (`base`, `insample_size`).<br>
        `y_hat_insample`: Insample train predictions of size (`base`, `insample_size`).<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `intervals_method`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>
        `**sampler_kwargs`: Coherent sampler instantiation arguments.<br>

        **Returns:**<br>
        `self`: object, fitted reconciler.
        """
        self.P, self.W = self._get_PW_matrices(S=S,
                                               y_hat=y_hat,
                                               y_insample=y_insample,
                                               y_hat_insample=y_hat_insample,
                                               idx_bottom=idx_bottom)                                               
        self.sampler = self._get_sampler(S=S,
                                         P=self.P,
                                         W=self.W,
                                         y_hat=y_hat,
                                         y_insample=y_insample,
                                         y_hat_insample=y_hat_insample,
                                         sigmah=sigmah, 
                                         intervals_method=intervals_method,
                                         num_samples=num_samples,
                                         seed=seed,
                                         tags=tags)
        self.fitted = True
        return self

    def fit_predict(self,
                    S: np.ndarray,
                    y_hat: np.ndarray,
                    idx_bottom: np.ndarray = None,
                    y_insample: Optional[np.ndarray] = None,
                    y_hat_insample: Optional[np.ndarray] = None,
                    sigmah: Optional[np.ndarray] = None,
                    level: Optional[List[int]] = None,
                    intervals_method: Optional[str] = None,
                    num_samples: Optional[int] = None,
                    seed: Optional[int] = None,
                    tags: Dict[str, np.ndarray] = None):
        """ERM Reconciliation Method.

        **Parameters:**<br>
        `S`: Summing matrix of size (`base`, `bottom`).<br>
        `y_hat`: Forecast values of size (`base`, `horizon`).<br>
        `y_insample`: Train values of size (`base`, `insample_size`).<br>
        `y_hat_insample`: Insample train predictions of size (`base`, `insample_size`).<br>
        `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).<br>
        `level`: float list 0-100, confidence levels for prediction intervals.<br>
        `intervals_method`: Sampler for prediction intevals, one of `normality`, `bootstrap`, `permbu`.<br>

        **Returns:**<br>
        `y_tilde`: Reconciliated y_hat using the ERM approach.
        """
        # Fit creates P, W and sampler attributes
        self.fit(S=S,
                 y_hat=y_hat,
                 y_insample=y_insample,
                 y_hat_insample=y_hat_insample,
                 sigmah=sigmah,
                 intervals_method=intervals_method,
                 num_samples=num_samples,
                 seed=seed,
                 tags=tags, idx_bottom=idx_bottom)

        return self._reconcile(S=S, P=self.P, y_hat=y_hat,
                               level=level, sampler=self.sampler)

    __call__ = fit_predict