## Example Case with sktime data

In [1]:
# %%
from sklearn.pipeline import make_pipeline
from sktime.forecasting.compose._reduce import (
    RecursiveReductionForecaster,
)
from xgboost import XGBRegressor
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ForecastingOptunaSearchCV,
)
from sktime.performance_metrics.forecasting import MeanAbsoluteError
from sktime.utils.estimator_checks import check_estimator

import optuna

parallel_config = {
    "backend:parallel": "joblib",
    "backend:parallel:params": {"backend": "loky", "n_jobs": -1},  # deactivate parallel here
}

lags = 12

params_xgb = {
    "random_state": 42,
}

regressor = make_pipeline(
    XGBRegressor(**params_xgb),
)

model_xgb = RecursiveReductionForecaster(
    estimator=regressor,
    impute_method="bfill",
    pooling="global",
    window_length=lags,
)

pipe_forecast = model_xgb

  warn(


In [2]:
from utils import load_stallion

_, y = load_stallion(as_period=True)

In [3]:
# Filter the data to reduce the dataset size
y = y.reset_index()
y = y[y['agency'].str.match(r'Agency_0[1-2]$')]
y = y[y['sku'].str.match(r'SKU_0[1-3]$')]
y = y[y['date'] > '2013-12']
y = y.set_index(["agency", "sku", "date"])

In [4]:
from sktime.transformations.hierarchical.aggregate import Aggregator

agg = Aggregator(flatten_single_levels=False)
data_agg = agg.reset().fit_transform(y)

In [5]:
from sktime.forecasting.model_selection import temporal_train_test_split

y_train, y_test = temporal_train_test_split(y, test_size=18)
test_fh = y_test.index.get_level_values(-1).unique()

In [6]:
from sktime.split import ExpandingWindowSplitter, SlidingWindowSplitter
import numpy as np

parameter_search_space_optuna = {
    "estimator__xgbregressor__n_estimators": optuna.distributions.IntDistribution(10, 3000),
}

parameter_search_space_grid = {
    "estimator__xgbregressor__n_estimators": list(range(10, 3000, 10)),  # grid
}

parameter_search_space_rrf = {
    "forecaster__forecaster__estimator__xgbregressor__n_estimators": optuna.distributions.IntDistribution(10, 3000),
}

fh = 18
fold = 4
step_length=3
y_size = len(y_train.index.get_level_values(-1).unique())
single_fold_length = y_size - (fold - 1) * step_length - fh
fold_strategy =  ExpandingWindowSplitter(
                fh=np.arange(1, fh + 1), initial_window=single_fold_length, step_length=step_length
            )

sampler = optuna.samplers.TPESampler(seed=42)

grid = ForecastingGridSearchCV(
    forecaster=pipe_forecast,
    param_grid=parameter_search_space_grid,
    cv=fold_strategy,
    error_score="raise",
    scoring=MeanAbsoluteError(),
)


htcv = ForecastingOptunaSearchCV(
    forecaster=pipe_forecast,
    param_grid=parameter_search_space_optuna,
    cv=fold_strategy,
    n_evals=5,
    scoring=MeanAbsoluteError(),
    error_score="raise",
    sampler=sampler,
)

  warn(


## Show Version

In [7]:
import pandas as pd
pd.show_versions()




INSTALLED VERSIONS
------------------
commit           : 2e218d10984e9919f0296931d92ea851c6a6faf5
python           : 3.10.6.final.0
python-bits      : 64
OS               : Linux
OS-release       : 5.15.153.1-microsoft-standard-WSL2
Version          : #1 SMP Fri Mar 29 23:14:13 UTC 2024
machine          : x86_64
processor        : x86_64
byteorder        : little
LC_ALL           : None
LANG             : C.UTF-8
LOCALE           : en_US.UTF-8

pandas           : 1.5.3
numpy            : 1.23.5
pytz             : 2024.2
dateutil         : 2.9.0.post0
setuptools       : 75.1.0
pip              : 24.2
Cython           : 3.0.11
pytest           : 7.4.2
hypothesis       : 6.112.1
sphinx           : None
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : None
html5lib         : None
pymysql          : None
psycopg2         : None
jinja2           : 3.1.4
IPython          : 8.27.0
pandas_datareader: None
bs4              : None
bottleneck       : None

## Check Estimator

In [None]:
# vscode crashes
check_est_before_fit_grid = check_estimator(grid)

  warn(


**********************************************************************
File "/home/l29244/churn-planning-time-series/venv2/lib/python3.10/site-packages/sktime/forecasting/model_selection/_tune.py", line 552, in NoName
Failed example:
    gscv.fit(y)
Expected:
    ForecastingGridSearchCV(...)
Got:
    ForecastingGridSearchCV(cv=ExpandingWindowSplitter(fh=[1, 2, 3]),
                            forecaster=NaiveForecaster(),
                            param_grid={'strategy': ['last', 'mean', 'drift']})


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


KeyboardInterrupt: 

In [None]:
check_est_before_fit_optuna = check_estimator(htcv)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


**********************************************************************
File "/home/l29244/churn-planning-time-series/venv2/lib/python3.10/site-packages/sktime/forecasting/model_selection/_tune.py", line 1697, in NoName
Failed example:
    fh = ForecastingHorizon(y_test.index, is_relative=False).to_relative(
Exception raised:
    Traceback (most recent call last):
      File "/home/l29244/.pyenv/versions/3.10.6/lib/python3.10/doctest.py", line 1350, in __run
        exec(compile(example.source, filename, "single",
      File "<doctest NoName[18]>", line 1
        fh = ForecastingHorizon(y_test.index, is_relative=False).to_relative(
                                                                            ^
    SyntaxError: '(' was never closed
**********************************************************************
File "/home/l29244/churn-planning-time-series/venv2/lib/python3.10/site-packages/sktime/forecasting/model_selection/_tune.py", line 1698, in NoName
Failed example:
    cutoff

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
[I 2024-12-06 11:52:54,138] A new study created in memory with name: no-name-6d35c00c-d148-4f18-9159-a08f7d0fb1c0
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
[I 2024-12-06 11:56:13,338] A new study created in memory with name: no-name-23174dbe-72ac-43fe-bc24-6a19a2f65bd8
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  wa

In [11]:
check_est_before_fit = check_estimator(model_xgb)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


FAILED: test_hierarchical_with_exogeneous[RecursiveReductionForecaster-y:1cols]
FAILED: test_hierarchical_with_exogeneous[RecursiveReductionForecaster-y:2cols]


  warn(


## Fit

In [12]:
# fit works for XGBoost and RRF
only_rff_xgb_fitted = model_xgb.fit(
    y=y_train,
    fh=test_fh,
)

In [13]:
# fit fails
# TypeError: Cannot convert input [('Agency_01', 'SKU_01', Period('2013-01', 'M'))] of type <class 'tuple'> to Timestamp
optuna_fitted = htcv.fit(
    y=y_train,
    fh=test_fh,
)

  warn(
[I 2024-12-06 11:48:46,754] A new study created in memory with name: no-name-33999cc5-9aa2-43c9-ac13-eaa350e488c9
  warn(
  warn(


TypeError: Cannot convert input [('Agency_01', 'SKU_01', Period('2014-01', 'M'))] of type <class 'tuple'> to Timestamp

In [14]:
# fit fails
# TypeError: Cannot convert input [('Agency_01', 'SKU_01', Period('2013-01', 'M'))] of type <class 'tuple'> to Timestamp

grid_fitted = grid.fit(
    y=y_train,
    fh=test_fh,
)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


TypeError: Cannot convert input [('Agency_01', 'SKU_01', Period('2014-01', 'M'))] of type <class 'tuple'> to Timestamp

## predict

In [15]:
# fails
# TypeError: Cannot convert input [('Agency_01', 'SKU_01', Period('2013-01', 'M'))] of type <class 'tuple'> to Timestamp
only_rff_xgb_fitted.predict(fh=test_fh)

TypeError: Cannot convert input [('Agency_01', 'SKU_01', Period('2014-01', 'M'))] of type <class 'tuple'> to Timestamp

In [None]:
optuna_fitted.predict(fh=test_fh)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,volume
agency,sku,date,Unnamed: 3_level_1
Agency_01,SKU_01,2016-07,84.921066
Agency_01,SKU_01,2016-08,114.410080
Agency_01,SKU_01,2016-09,106.359634
Agency_01,SKU_01,2016-10,86.365440
Agency_01,SKU_01,2016-11,80.918663
...,...,...,...
Agency_02,SKU_03,2017-08,19086.826172
Agency_02,SKU_03,2017-09,19086.826172
Agency_02,SKU_03,2017-10,19086.826172
Agency_02,SKU_03,2017-11,19086.826172


In [None]:
grid_fitted.predict(fh=test_fh)