From eabe7cc6533914b5c265b00560fe35ec8a8f0019 Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 4 Oct 2022 21:22:23 -0500 Subject: [PATCH 1/3] feat: add insample attribute --- hierarchicalforecast/methods.py | 35 +++++---- nbs/methods.ipynb | 128 ++++++++++++++++++++------------ 2 files changed, 99 insertions(+), 64 deletions(-) diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py index 0c2e4e2..e3164c6 100644 --- a/hierarchicalforecast/methods.py +++ b/hierarchicalforecast/methods.py @@ -88,6 +88,8 @@ class BottomUp: - [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). \"Data aggregation and information loss\". The American Economic Review, 58 , 773{787)](http://www.jstor.org/stable/1815532). """ + insample = False + def reconcile(self, S: np.ndarray, y_hat: np.ndarray, @@ -253,12 +255,13 @@ class TopDown: def __init__(self, method: str): self.method = method + self.insample = method in ['average_proportions', 'proportion_averages'] def reconcile(self, S: np.ndarray, y_hat: np.ndarray, - y_insample: np.ndarray, tags: Dict[str, np.ndarray], + y_insample: Optional[np.ndarray] = None, sigmah: Optional[np.ndarray] = None, level: Optional[List[int]] = None, bootstrap: bool = False, @@ -268,6 +271,8 @@ def reconcile(self, **Parameters:**
`S`: Summing matrix of size (`base`, `bottom`).
`y_hat`: Forecast values of size (`base`, `horizon`).
+ `tags`: Each key is a level and each value its `S` indices.
+ `y_insample`: Insample values of size (`base`, `insample_size`). Optional for `forecast_proportions` method.
`idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).
`sigmah`: float, estimate of the standard deviation of the h-step forecast of size (`base`, `horizon`)
`level`: float list 0-100, confidence levels for prediction intervals.
@@ -345,7 +350,7 @@ def middle_out(S: np.ndarray, counter += idxs_len td = top_down(S_node, y_hat[idxs_node], - y_insample[idxs_node], + y_insample[idxs_node] if y_insample is not None else None, levels_node_, method=top_down_method) reconciled[idxs_node] = td['mean'] @@ -375,19 +380,20 @@ def __init__(self, top_down_method: str): self.middle_level = middle_level self.top_down_method = top_down_method + self.insample = top_down_method in ['average_proportions', 'proportion_averages'] def reconcile(self, S: np.ndarray, y_hat: np.ndarray, - y_insample: np.ndarray, - tags: Dict[str, np.ndarray]): + tags: Dict[str, np.ndarray], + y_insample: Optional[np.ndarray] = None): """Middle Out Reconciliation Method. **Parameters:**
`S`: Summing matrix of size (`base`, `bottom`).
`y_hat`: Forecast values of size (`base`, `horizon`).
- `y_insample`: Insample values of size (`base`, `insample_size`).
- `levels`: Each key is a level and each value its `S` indices.
+ `tags`: Each key is a level and each value its `S` indices.
+ `y_insample`: Insample values of size (`base`, `insample_size`). Only used for `forecast_proportions`
**Returns:**
`y_tilde`: Reconciliated y_hat using the Middle Out approach. @@ -471,7 +477,7 @@ class MinTrace: \mathbf{S}^{\intercal}\mathbf{W}^{-1}_{h}$$ **Parameters:**
- `method`: str, one of `ols`, `wls_struct`, `wls_var`, `mint_shrink`, `mint_co`.
+ `method`: str, one of `ols`, `wls_struct`, `wls_var`, `mint_shrink`, `mint_cov`.
**References:**
- [Wickramasuriya, S. L., Athanasopoulos, G., & Hyndman, R. J. (2019). \"Optimal forecast reconciliation for @@ -481,12 +487,13 @@ class MinTrace: def __init__(self, method: str): self.method = method + self.insample = method in ['wls_var', 'mint_cov', 'mint_shrink'] def reconcile(self, S: np.ndarray, y_hat: np.ndarray, - y_insample: np.ndarray, - y_hat_insample: np.ndarray, + y_insample: Optional[np.ndarray] = None, + y_hat_insample: Optional[np.ndarray] = None, sigmah: Optional[np.ndarray] = None, level: Optional[List[int]] = None, bootstrap: bool = False, @@ -496,7 +503,8 @@ def reconcile(self, **Parameters:**
`S`: Summing matrix of size (`base`, `bottom`).
`y_hat`: Forecast values of size (`base`, `horizon`).
- `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).
+ `y_insample`: Insample values of size (`base`, `insample_size`). Only used by `wls_var`, `mint_cov`, `mint_shrink`
+ `y_hat_insample`: Insample fitted values of size (`base`, `insample_size`). Only used by `wls_var`, `mint_cov`, `mint_shrink`
`sigmah`: float, estimate of the standard deviation of the h-step forecast of size (`base`, `horizon`)
`level`: float list 0-100, confidence levels for prediction intervals.
`bootstrap`: bool, whether or not to use bootstraped prediction intervals, alternative normality assumption.
@@ -560,12 +568,11 @@ def __init__(self, raise ValueError(f"Optimal Combination class does not support method: \"{method}\"") self.method = method + self.insample = False def reconcile(self, S: np.ndarray, y_hat: np.ndarray, - y_insample: np.ndarray = None, - y_hat_insample: np.ndarray = None, sigmah: Optional[np.ndarray] = None, level: Optional[List[int]] = None, bootstrap: bool = False, @@ -575,7 +582,6 @@ def reconcile(self, **Parameters:**
`S`: Summing matrix of size (`base`, `bottom`).
`y_hat`: Forecast values of size (`base`, `horizon`).
- `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).
`sigmah`: float, estimate of the standard deviation of the h-step forecast of size (`base`, `horizon`)
`level`: float list 0-100, confidence levels for prediction intervals.
`bootstrap`: bool, whether or not to use bootstraped prediction intervals, alternative normality assumption.
@@ -586,8 +592,6 @@ def reconcile(self, """ return optimal_combination(S=S, y_hat=y_hat, - y_insample=y_insample, - y_hat_insample=y_hat_insample, method=self.method, sigmah=sigmah, level=level, bootstrap=bootstrap, bootstrap_samples=bootstrap_samples) @@ -704,6 +708,7 @@ def __init__(self, lambda_reg: float = 1e-2): self.method = method self.lambda_reg = lambda_reg + self.insample = True def reconcile(self, S: np.ndarray, diff --git a/nbs/methods.ipynb b/nbs/methods.ipynb index b101ae8..7a490a2 100644 --- a/nbs/methods.ipynb +++ b/nbs/methods.ipynb @@ -149,6 +149,8 @@ " - [Orcutt, G.H., Watts, H.W., & Edwards, J.B.(1968). \\\"Data aggregation and information loss\\\". The American \n", " Economic Review, 58 , 773{787)](http://www.jstor.org/stable/1815532).\n", " \"\"\"\n", + " insample = False\n", + " \n", " def reconcile(self,\n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", @@ -516,12 +518,13 @@ " def __init__(self, \n", " method: str):\n", " self.method = method\n", + " self.insample = method in ['average_proportions', 'proportion_averages']\n", " \n", " def reconcile(self, \n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " y_insample: np.ndarray,\n", " tags: Dict[str, np.ndarray],\n", + " y_insample: Optional[np.ndarray] = None,\n", " sigmah: Optional[np.ndarray] = None,\n", " level: Optional[List[int]] = None,\n", " bootstrap: bool = False,\n", @@ -531,6 +534,8 @@ " **Parameters:**
\n", " `S`: Summing matrix of size (`base`, `bottom`).
\n", " `y_hat`: Forecast values of size (`base`, `horizon`).
\n", + " `tags`: Each key is a level and each value its `S` indices.
\n", + " `y_insample`: Insample values of size (`base`, `insample_size`). Optional for `forecast_proportions` method.
\n", " `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).
\n", " `sigmah`: float, estimate of the standard deviation of the h-step forecast of size (`base`, `horizon`)
\n", " `level`: float list 0-100, confidence levels for prediction intervals.
\n", @@ -583,15 +588,26 @@ "# but it is not a general case\n", "for method in ['forecast_proportions', 'average_proportions', 'proportion_averages']:\n", " cls_top_down = TopDown(method=method)\n", - " test_close(\n", - " cls_top_down(\n", - " S=S, \n", - " y_hat=S @ y_hat_bottom, \n", - " y_insample=S @ y_bottom, \n", - " tags=tags\n", - " )['mean'],\n", - " S @ y_hat_bottom\n", - " )" + " if cls_top_down.insample:\n", + " assert method in ['average_proportions', 'proportion_averages']\n", + " test_close(\n", + " cls_top_down(\n", + " S=S, \n", + " y_hat=S @ y_hat_bottom, \n", + " y_insample=S @ y_bottom, \n", + " tags=tags\n", + " )['mean'],\n", + " S @ y_hat_bottom\n", + " )\n", + " else:\n", + " test_close(\n", + " cls_top_down(\n", + " S=S, \n", + " y_hat=S @ y_hat_bottom, \n", + " tags=tags\n", + " )['mean'],\n", + " S @ y_hat_bottom\n", + " )" ] }, { @@ -689,7 +705,7 @@ " counter += idxs_len\n", " td = top_down(S_node, \n", " y_hat[idxs_node], \n", - " y_insample[idxs_node], \n", + " y_insample[idxs_node] if y_insample is not None else None, \n", " levels_node_, \n", " method=top_down_method)\n", " reconciled[idxs_node] = td['mean']\n", @@ -733,19 +749,20 @@ " top_down_method: str):\n", " self.middle_level = middle_level\n", " self.top_down_method = top_down_method \n", + " self.insample = top_down_method in ['average_proportions', 'proportion_averages']\n", " \n", " def reconcile(self, \n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " y_insample: np.ndarray,\n", - " tags: Dict[str, np.ndarray]):\n", + " tags: Dict[str, np.ndarray],\n", + " y_insample: Optional[np.ndarray] = None):\n", " \"\"\"Middle Out Reconciliation Method.\n", "\n", " **Parameters:**
\n", " `S`: Summing matrix of size (`base`, `bottom`).
\n", " `y_hat`: Forecast values of size (`base`, `horizon`).
\n", - " `y_insample`: Insample values of size (`base`, `insample_size`).
\n", - " `levels`: Each key is a level and each value its `S` indices.
\n", + " `tags`: Each key is a level and each value its `S` indices.
\n", + " `y_insample`: Insample values of size (`base`, `insample_size`). Only used for `forecast_proportions`
\n", "\n", " **Returns:**
\n", " `y_tilde`: Reconciliated y_hat using the Middle Out approach.\n", @@ -792,15 +809,26 @@ "# but it is not a general case\n", "for method in ['forecast_proportions', 'average_proportions', 'proportion_averages']:\n", " cls_middle_out = MiddleOut(middle_level='level2', top_down_method=method)\n", - " test_close(\n", - " cls_middle_out(\n", - " S=S, \n", - " y_hat=S @ y_hat_bottom, \n", - " y_insample=S @ y_bottom, \n", - " tags=tags\n", - " )['mean'],\n", - " S @ y_hat_bottom\n", - " )" + " if cls_middle_out.insample:\n", + " assert method in ['average_proportions', 'proportion_averages']\n", + " test_close(\n", + " cls_middle_out(\n", + " S=S, \n", + " y_hat=S @ y_hat_bottom, \n", + " y_insample=S @ y_bottom, \n", + " tags=tags\n", + " )['mean'],\n", + " S @ y_hat_bottom\n", + " )\n", + " else:\n", + " test_close(\n", + " cls_middle_out(\n", + " S=S, \n", + " y_hat=S @ y_hat_bottom, \n", + " tags=tags\n", + " )['mean'],\n", + " S @ y_hat_bottom\n", + " )" ] }, { @@ -901,7 +929,7 @@ " \\mathbf{S}^{\\intercal}\\mathbf{W}^{-1}_{h}$$\n", " \n", " **Parameters:**
\n", - " `method`: str, one of `ols`, `wls_struct`, `wls_var`, `mint_shrink`, `mint_co`.
\n", + " `method`: str, one of `ols`, `wls_struct`, `wls_var`, `mint_shrink`, `mint_cov`.
\n", "\n", " **References:**
\n", " - [Wickramasuriya, S. L., Athanasopoulos, G., & Hyndman, R. J. (2019). \\\"Optimal forecast reconciliation for\n", @@ -911,12 +939,13 @@ " def __init__(self, \n", " method: str):\n", " self.method = method\n", + " self.insample = method in ['wls_var', 'mint_cov', 'mint_shrink']\n", "\n", " def reconcile(self, \n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " y_insample: np.ndarray,\n", - " y_hat_insample: np.ndarray,\n", + " y_insample: Optional[np.ndarray] = None,\n", + " y_hat_insample: Optional[np.ndarray] = None,\n", " sigmah: Optional[np.ndarray] = None,\n", " level: Optional[List[int]] = None,\n", " bootstrap: bool = False,\n", @@ -926,7 +955,8 @@ " **Parameters:**
\n", " `S`: Summing matrix of size (`base`, `bottom`).
\n", " `y_hat`: Forecast values of size (`base`, `horizon`).
\n", - " `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).
\n", + " `y_insample`: Insample values of size (`base`, `insample_size`). Only used by `wls_var`, `mint_cov`, `mint_shrink`
\n", + " `y_hat_insample`: Insample fitted values of size (`base`, `insample_size`). Only used by `wls_var`, `mint_cov`, `mint_shrink`
\n", " `sigmah`: float, estimate of the standard deviation of the h-step forecast of size (`base`, `horizon`)
\n", " `level`: float list 0-100, confidence levels for prediction intervals.
\n", " `bootstrap`: bool, whether or not to use bootstraped prediction intervals, alternative normality assumption.
\n", @@ -985,15 +1015,25 @@ "#| hide\n", "for method in ['ols', 'wls_struct', 'wls_var', 'mint_shrink']:\n", " cls_min_trace = MinTrace(method=method)\n", - " test_close(\n", - " cls_min_trace(\n", - " S=S, \n", - " y_hat=S @ y_hat_bottom, \n", - " y_insample=S @ y_bottom,\n", - " y_hat_insample=S @ y_hat_bottom_insample\n", - " )['mean'],\n", - " S @ y_hat_bottom\n", - " )\n", + " if cls_min_trace.insample:\n", + " assert method in ['wls_var', 'mint_cov', 'mint_shrink']\n", + " test_close(\n", + " cls_min_trace(\n", + " S=S, \n", + " y_hat=S @ y_hat_bottom, \n", + " y_insample=S @ y_bottom,\n", + " y_hat_insample=S @ y_hat_bottom_insample\n", + " )['mean'],\n", + " S @ y_hat_bottom\n", + " )\n", + " else:\n", + " test_close(\n", + " cls_min_trace(\n", + " S=S, \n", + " y_hat=S @ y_hat_bottom, \n", + " )['mean'],\n", + " S @ y_hat_bottom\n", + " )\n", "with ExceptionExpected(regex='min_trace (mint_cov)*'):\n", " cls_min_trace = MinTrace(method='mint_cov')\n", " cls_min_trace(\n", @@ -1101,12 +1141,11 @@ " raise ValueError(f\"Optimal Combination class does not support method: \\\"{method}\\\"\")\n", "\n", " self.method = method\n", + " self.insample = False\n", "\n", " def reconcile(self,\n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " y_insample: np.ndarray = None,\n", - " y_hat_insample: np.ndarray = None,\n", " sigmah: Optional[np.ndarray] = None,\n", " level: Optional[List[int]] = None,\n", " bootstrap: bool = False,\n", @@ -1116,7 +1155,6 @@ " **Parameters:**
\n", " `S`: Summing matrix of size (`base`, `bottom`).
\n", " `y_hat`: Forecast values of size (`base`, `horizon`).
\n", - " `idx_bottom`: Indices corresponding to the bottom level of `S`, size (`bottom`).
\n", " `sigmah`: float, estimate of the standard deviation of the h-step forecast of size (`base`, `horizon`)
\n", " `level`: float list 0-100, confidence levels for prediction intervals.
\n", " `bootstrap`: bool, whether or not to use bootstraped prediction intervals, alternative normality assumption.
\n", @@ -1127,8 +1165,6 @@ " \"\"\"\n", " return optimal_combination(S=S,\n", " y_hat=y_hat,\n", - " y_insample=y_insample,\n", - " y_hat_insample=y_hat_insample,\n", " method=self.method, sigmah=sigmah,\n", " level=level, bootstrap=bootstrap,\n", " bootstrap_samples=bootstrap_samples)\n", @@ -1338,6 +1374,7 @@ " lambda_reg: float = 1e-2):\n", " self.method = method\n", " self.lambda_reg = lambda_reg\n", + " self.insample = True\n", "\n", " def reconcile(self, \n", " S: np.ndarray,\n", @@ -1480,13 +1517,6 @@ "### Bootstraped Prediction Intervals\n", "- [Puwasala Gamakumara Ph. D. dissertation. Monash University, Econometrics and Business Statistics. \"Probabilistic Forecast Reconciliation\"](https://bridges.monash.edu/articles/thesis/Probabilistic_Forecast_Reconciliation_Theory_and_Applications/11869533)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 4d997beb19ca3f7a80c0313c48f4d908f0fe023e Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 4 Oct 2022 21:46:13 -0500 Subject: [PATCH 2/3] feat: add optional insample values reconcile method --- hierarchicalforecast/core.py | 19 ++-- nbs/core.ipynb | 196 +++++++++++++++++++++-------------- nbs/evaluation.ipynb | 3 +- 3 files changed, 134 insertions(+), 84 deletions(-) diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py index 90482bd..089d6ca 100644 --- a/hierarchicalforecast/core.py +++ b/hierarchicalforecast/core.py @@ -42,12 +42,13 @@ class HierarchicalReconciliation: def __init__(self, reconcilers: List[Callable]): self.reconcilers = reconcilers + self.insample = any([method.insample for method in reconcilers]) def reconcile(self, Y_hat_df: pd.DataFrame, - Y_df: pd.DataFrame, S: pd.DataFrame, tags: Dict[str, np.ndarray], + Y_df: Optional[pd.DataFrame] = None, level: Optional[List[int]] = None, bootstrap: bool = False): """Hierarchical Reconciliation Method. @@ -87,11 +88,17 @@ def reconcile(self, # same order of Y_hat_df to prevent errors S_ = S.loc[uids] common_vals = dict( - y_insample = Y_df.pivot(columns='ds', values='y').loc[uids].values.astype(np.float32), - S = S_.values.astype(np.float32), - idx_bottom = S_.index.get_indexer(S.columns), + S=S_.values.astype(np.float32), + idx_bottom=S_.index.get_indexer(S.columns), tags={key: S_.index.get_indexer(val) for key, val in tags.items()} ) + # we need insample values if + # we are using a method that requires them + # or if we are performing boostrap + if self.insample or bootstrap: + if Y_df is None: + raise Exception('you need to pass `Y_df`') + common_vals['y_insample'] = Y_df.pivot(columns='ds', values='y').loc[uids].values.astype(np.float32) fcsts = Y_hat_df.copy() for reconcile_fn in self.reconcilers: reconcile_fn_name = _build_fn_name(reconcile_fn) @@ -117,7 +124,7 @@ def reconcile(self, sigmah = sign * (y_hat_model - sigmah) / z common_vals['sigmah'] = sigmah common_vals['level'] = level - if has_fitted or bootstrap: + if (self.insample and has_fitted) or bootstrap: if model_name in Y_df: y_hat_insample = Y_df.pivot(columns='ds', values=model_name).loc[uids].values y_hat_insample = y_hat_insample.astype(np.float32) @@ -151,6 +158,6 @@ def reconcile(self, else: del common_vals['bootstrap_samples'] del common_vals['bootstrap'] - if has_fitted: + if self.insample and has_fitted: del common_vals['y_hat_insample'] return fcsts diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 4b7b381..71fe710 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -102,12 +102,13 @@ " def __init__(self,\n", " reconcilers: List[Callable]):\n", " self.reconcilers = reconcilers\n", + " self.insample = any([method.insample for method in reconcilers])\n", "\n", " def reconcile(self, \n", " Y_hat_df: pd.DataFrame,\n", - " Y_df: pd.DataFrame,\n", " S: pd.DataFrame,\n", " tags: Dict[str, np.ndarray],\n", + " Y_df: Optional[pd.DataFrame] = None,\n", " level: Optional[List[int]] = None,\n", " bootstrap: bool = False):\n", " \"\"\"Hierarchical Reconciliation Method.\n", @@ -147,11 +148,17 @@ " # same order of Y_hat_df to prevent errors\n", " S_ = S.loc[uids]\n", " common_vals = dict(\n", - " y_insample = Y_df.pivot(columns='ds', values='y').loc[uids].values.astype(np.float32),\n", - " S = S_.values.astype(np.float32),\n", - " idx_bottom = S_.index.get_indexer(S.columns),\n", + " S=S_.values.astype(np.float32),\n", + " idx_bottom=S_.index.get_indexer(S.columns),\n", " tags={key: S_.index.get_indexer(val) for key, val in tags.items()}\n", " )\n", + " # we need insample values if \n", + " # we are using a method that requires them\n", + " # or if we are performing boostrap\n", + " if self.insample or bootstrap:\n", + " if Y_df is None:\n", + " raise Exception('you need to pass `Y_df`')\n", + " common_vals['y_insample'] = Y_df.pivot(columns='ds', values='y').loc[uids].values.astype(np.float32)\n", " fcsts = Y_hat_df.copy()\n", " for reconcile_fn in self.reconcilers:\n", " reconcile_fn_name = _build_fn_name(reconcile_fn)\n", @@ -177,7 +184,7 @@ " sigmah = sign * (y_hat_model - sigmah) / z\n", " common_vals['sigmah'] = sigmah\n", " common_vals['level'] = level\n", - " if has_fitted or bootstrap:\n", + " if (self.insample and has_fitted) or bootstrap:\n", " if model_name in Y_df:\n", " y_hat_insample = Y_df.pivot(columns='ds', values=model_name).loc[uids].values\n", " y_hat_insample = y_hat_insample.astype(np.float32)\n", @@ -211,7 +218,7 @@ " else:\n", " del common_vals['bootstrap_samples']\n", " del common_vals['bootstrap']\n", - " if has_fitted:\n", + " if self.insample and has_fitted:\n", " del common_vals['y_hat_insample']\n", " return fcsts" ] @@ -245,72 +252,6 @@ " name='reconcile', title_level=3)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| eval: false\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from statsforecast.core import StatsForecast\n", - "from statsforecast.models import ETS, Naive\n", - "\n", - "from hierarchicalforecast.utils import aggregate\n", - "from hierarchicalforecast.core import HierarchicalReconciliation\n", - "from hierarchicalforecast.methods import BottomUp, MinTrace\n", - "\n", - "# Load TourismSmall dataset\n", - "df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')\n", - "df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)\n", - "df.insert(0, 'Country', 'Australia')\n", - "\n", - "# Create hierarchical seires based on geographic levels and purpose\n", - "# And Convert quarterly ds string to pd.datetime format\n", - "hierarchy_levels = [['Country'],\n", - " ['Country', 'State'], \n", - " ['Country', 'Purpose'], \n", - " ['Country', 'State', 'Region'], \n", - " ['Country', 'State', 'Purpose'], \n", - " ['Country', 'State', 'Region', 'Purpose']]\n", - "\n", - "Y_df, S, tags = aggregate(df=df, spec=hierarchy_levels)\n", - "qs = Y_df['ds'].str.replace(r'(\\d+) (Q\\d)', r'\\1-\\2', regex=True)\n", - "Y_df['ds'] = pd.PeriodIndex(qs, freq='Q').to_timestamp()\n", - "Y_df = Y_df.reset_index()\n", - "\n", - "# Split train/test sets\n", - "Y_test_df = Y_df.groupby('unique_id').tail(4)\n", - "Y_train_df = Y_df.drop(Y_test_df.index)\n", - "\n", - "# Compute base auto-ETS predictions\n", - "# Careful identifying correct data freq, this data quarterly 'Q'\n", - "fcst = StatsForecast(df=Y_train_df,\n", - " #models=[ETS(season_length=12), Naive()],\n", - " models=[Naive()],\n", - " freq='Q', n_jobs=-1) \n", - "Y_hat_df = fcst.forecast(h=4)\n", - "\n", - "# Reconcile the base predictions\n", - "Y_train_df = Y_train_df.reset_index().set_index('unique_id')\n", - "Y_hat_df = Y_hat_df.reset_index().set_index('unique_id')\n", - "reconcilers = [BottomUp(),\n", - " MinTrace(method='ols')]\n", - "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", - "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df,\n", - " S=S, tags=tags)\n", - "Y_rec_df.groupby('unique_id').head(2)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -382,7 +323,32 @@ " # ERM recovers but needs bigger eps\n", " #ERM(method='reg_bu', lambda_reg=None),\n", "])\n", - "reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)\n", + "reconciled = hrec.reconcile(Y_hat_df=hier_grouped_df_h, Y_df=hier_grouped_df, \n", + " S=S_grouped, tags=tags_grouped)\n", + "for model in reconciled.drop(columns=['ds', 'y']).columns:\n", + " if 'ERM' in model:\n", + " eps = 3\n", + " else:\n", + " eps = 1e-5\n", + " test_close(reconciled['y'], reconciled[model], eps=eps)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# test reconcile method without insample\n", + "hrec = HierarchicalReconciliation(reconcilers=[\n", + " #these methods should reconstruct the original y\n", + " BottomUp(),\n", + " MinTrace(method='ols'),\n", + " MinTrace(method='wls_struct'),\n", + "])\n", + "reconciled = hrec.reconcile(Y_hat_df=hier_grouped_df_h,\n", + " S=S_grouped, tags=tags_grouped)\n", "for model in reconciled.drop(columns=['ds', 'y']).columns:\n", " if 'ERM' in model:\n", " eps = 3\n", @@ -404,7 +370,7 @@ "test_fail(\n", " hrec.reconcile,\n", " contains='requires strictly hierarchical structures',\n", - " args=(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)\n", + " args=(hier_grouped_df_h, S_grouped, tags_grouped, hier_grouped_df,)\n", ")" ] }, @@ -448,7 +414,12 @@ " # ERM recovers but needs bigger eps\n", " #ERM(method='reg_bu', lambda_reg=None),\n", "])\n", - "reconciled = hrec.reconcile(hier_strict_df_h, hier_strict_df, S_strict, tags_strict)\n", + "reconciled = hrec.reconcile(\n", + " Y_hat_df=hier_strict_df_h, \n", + " Y_df=hier_strict_df, \n", + " S=S_strict, \n", + " tags=tags_strict\n", + ")\n", "for model in reconciled.drop(columns=['ds', 'y']).columns:\n", " if 'ERM' in model:\n", " eps = 3\n", @@ -495,7 +466,12 @@ "#even if their signature includes\n", "#that argument\n", "hrec = HierarchicalReconciliation([MinTrace(method='ols')])\n", - "reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df.drop(columns=['y_model']), S_grouped, tags_grouped)\n", + "reconciled = hrec.reconcile(\n", + " Y_hat_df=hier_grouped_df_h, \n", + " Y_df=hier_grouped_df.drop(columns=['y_model']), \n", + " S=S_grouped, \n", + " tags=tags_grouped\n", + ")\n", "for model in reconciled.drop(columns=['ds', 'y']).columns:\n", " test_close(reconciled['y'], reconciled[model])" ] @@ -521,7 +497,7 @@ "#intervals\n", "hrec = HierarchicalReconciliation([BottomUp()])\n", "reconciled = hrec.reconcile(hier_grouped_df_h, \n", - " hier_grouped_df, S_grouped, tags_grouped,\n", + " Y_df=hier_grouped_df, S=S_grouped, tags=tags_grouped,\n", " level=[80, 90], bootstrap=True)\n", "total = reconciled.loc[tags_grouped['Country/State/Region/Purpose']].groupby('ds').sum().reset_index()\n", "pd.testing.assert_frame_equal(\n", @@ -529,6 +505,72 @@ " reconciled.loc['Australia'][['ds', 'y_model/BottomUp']].reset_index(drop=True)\n", ")" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| eval: false\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from statsforecast.core import StatsForecast\n", + "from statsforecast.models import ETS, Naive\n", + "\n", + "from hierarchicalforecast.utils import aggregate\n", + "from hierarchicalforecast.core import HierarchicalReconciliation\n", + "from hierarchicalforecast.methods import BottomUp, MinTrace\n", + "\n", + "# Load TourismSmall dataset\n", + "df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')\n", + "df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)\n", + "df.insert(0, 'Country', 'Australia')\n", + "\n", + "# Create hierarchical seires based on geographic levels and purpose\n", + "# And Convert quarterly ds string to pd.datetime format\n", + "hierarchy_levels = [['Country'],\n", + " ['Country', 'State'], \n", + " ['Country', 'Purpose'], \n", + " ['Country', 'State', 'Region'], \n", + " ['Country', 'State', 'Purpose'], \n", + " ['Country', 'State', 'Region', 'Purpose']]\n", + "\n", + "Y_df, S, tags = aggregate(df=df, spec=hierarchy_levels)\n", + "qs = Y_df['ds'].str.replace(r'(\\d+) (Q\\d)', r'\\1-\\2', regex=True)\n", + "Y_df['ds'] = pd.PeriodIndex(qs, freq='Q').to_timestamp()\n", + "Y_df = Y_df.reset_index()\n", + "\n", + "# Split train/test sets\n", + "Y_test_df = Y_df.groupby('unique_id').tail(4)\n", + "Y_train_df = Y_df.drop(Y_test_df.index)\n", + "\n", + "# Compute base auto-ETS predictions\n", + "# Careful identifying correct data freq, this data quarterly 'Q'\n", + "fcst = StatsForecast(df=Y_train_df,\n", + " #models=[ETS(season_length=12), Naive()],\n", + " models=[Naive()],\n", + " freq='Q', n_jobs=-1) \n", + "Y_hat_df = fcst.forecast(h=4)\n", + "\n", + "# Reconcile the base predictions\n", + "Y_train_df = Y_train_df.reset_index().set_index('unique_id')\n", + "Y_hat_df = Y_hat_df.reset_index().set_index('unique_id')\n", + "reconcilers = [BottomUp(),\n", + " MinTrace(method='ols')]\n", + "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", + "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df,\n", + " S=S, tags=tags)\n", + "Y_rec_df.groupby('unique_id').head(2)" + ] } ], "metadata": { diff --git a/nbs/evaluation.ipynb b/nbs/evaluation.ipynb index 06b666b..4ea722e 100644 --- a/nbs/evaluation.ipynb +++ b/nbs/evaluation.ipynb @@ -333,7 +333,8 @@ " # ERM recovers but needs bigger eps\n", " ERM(method='reg_bu', lambda_reg=None),\n", "])\n", - "reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)" + "reconciled = hrec.reconcile(Y_hat_df=hier_grouped_df_h, Y_df=hier_grouped_df, \n", + " S=S_grouped, tags=tags_grouped)" ] }, { From 7b1988416b4e489ede3fc5735e6db5aff86e23cd Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 4 Oct 2022 21:47:11 -0500 Subject: [PATCH 3/3] fix: examples arguments --- .../AustralianDomesticTourism-Bootstraped-Intervals.ipynb | 3 ++- nbs/examples/AustralianDomesticTourism-Intervals.ipynb | 2 +- nbs/examples/AustralianDomesticTourism.ipynb | 2 +- nbs/examples/AustralianPrisonPopulation.ipynb | 2 +- nbs/examples/TourismSmall.ipynb | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/nbs/examples/AustralianDomesticTourism-Bootstraped-Intervals.ipynb b/nbs/examples/AustralianDomesticTourism-Bootstraped-Intervals.ipynb index c23a36a..a9f75e4 100644 --- a/nbs/examples/AustralianDomesticTourism-Bootstraped-Intervals.ipynb +++ b/nbs/examples/AustralianDomesticTourism-Bootstraped-Intervals.ipynb @@ -606,7 +606,8 @@ " MinTrace(method='ols')\n", "]\n", "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", - "Y_rec_df = hrec.reconcile(Y_hat_df, Y_fitted_df, S, tags, level=[80, 90], bootstrap=True)" + "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_fitted_df, S=S, \n", + " tags=tags, level=[80, 90], bootstrap=True)" ] }, { diff --git a/nbs/examples/AustralianDomesticTourism-Intervals.ipynb b/nbs/examples/AustralianDomesticTourism-Intervals.ipynb index 6602771..216810b 100644 --- a/nbs/examples/AustralianDomesticTourism-Intervals.ipynb +++ b/nbs/examples/AustralianDomesticTourism-Intervals.ipynb @@ -606,7 +606,7 @@ " MinTrace(method='ols')\n", "]\n", "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", - "Y_rec_df = hrec.reconcile(Y_hat_df, Y_fitted_df, S, tags, level=[80, 90])" + "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_fitted_df, S=S, tags=tags, level=[80, 90])" ] }, { diff --git a/nbs/examples/AustralianDomesticTourism.ipynb b/nbs/examples/AustralianDomesticTourism.ipynb index 6d72cd9..5a07b60 100644 --- a/nbs/examples/AustralianDomesticTourism.ipynb +++ b/nbs/examples/AustralianDomesticTourism.ipynb @@ -547,7 +547,7 @@ " MinTrace(method='ols')\n", "]\n", "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", - "Y_rec_df = hrec.reconcile(Y_hat_df, Y_fitted_df, S, tags)" + "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_fitted_df, S=S, tags=tags)" ] }, { diff --git a/nbs/examples/AustralianPrisonPopulation.ipynb b/nbs/examples/AustralianPrisonPopulation.ipynb index 3ac03d3..f615a68 100644 --- a/nbs/examples/AustralianPrisonPopulation.ipynb +++ b/nbs/examples/AustralianPrisonPopulation.ipynb @@ -525,7 +525,7 @@ " MinTrace(method='mint_shrink')\n", "]\n", "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", - "Y_rec_df = hrec.reconcile(Y_hat_df, Y_fitted_df, S, tags)" + "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_fitted_df, S=S, tags=tags)" ] }, { diff --git a/nbs/examples/TourismSmall.ipynb b/nbs/examples/TourismSmall.ipynb index 7f399b6..c58ed4e 100644 --- a/nbs/examples/TourismSmall.ipynb +++ b/nbs/examples/TourismSmall.ipynb @@ -392,7 +392,7 @@ " top_down_method='forecast_proportions')\n", "]\n", "hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", - "Y_rec_df = hrec.reconcile(Y_hat_df, Y_train_df, S, tags)" + "Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, S=S, tags=tags)" ] }, {