# Forecasting Optuna Search CV 
## Modulus Set 2

**Notebook Goal**
- A modeling pipeline that optimizes the hyperparameters of the sktime forecasters that have the [capavility:pred_int tag](https://www.sktime.net/en/stable/examples/01b_forecasting_proba.html) 
- This notebook will focus on the ones where `i mod 4 = 2` wher `i` is the index of the registry table in the above link.
- The work will be based on this documentation: [ForecastingOptunaSearchCV](https://www.sktime.net/en/stable/api_reference/auto_generated/sktime.forecasting.model_selection.ForecastingOptunaSearchCV.html)

## Data Load

In [1]:
from sktime.forecasting.model_selection import (
    ForecastingOptunaSearchCV,
    )
from sktime.datasets import load_shampoo_sales
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
from sktime.forecasting.base import ForecastingHorizon
from sktime.split import ExpandingWindowSplitter
from sktime.split import temporal_train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.detrend import Deseasonalizer, Detrender
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.trend import STLForecaster
import optuna
from  optuna.distributions import CategoricalDistribution

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
y = load_shampoo_sales()
y_train, y_test = temporal_train_test_split(y=y, test_size=6)
fh = ForecastingHorizon(y_test.index, is_relative=False).to_relative(
cutoff=y_train.index[-1]
    )
cv = ExpandingWindowSplitter(fh=fh, initial_window=24, step_length=1)
forecaster = TransformedTargetForecaster(
    steps=[
        ("detrender", Detrender()),
        ("deseasonalizer", Deseasonalizer()),
        ("scaler", TabularToSeriesAdaptor(RobustScaler())),
        ("minmax2", TabularToSeriesAdaptor(MinMaxScaler((1, 10)))),
        ("forecaster", NaiveForecaster()),
        ]
    )
param_grid = {
    "scaler__transformer__with_scaling": CategoricalDistribution((True, False)),
    "forecaster": CategoricalDistribution((STLForecaster(), ThetaForecaster())),
}
gscv = ForecastingOptunaSearchCV(
    forecaster=forecaster,
    param_grid=param_grid,
    cv=cv,
    n_evals=10,
    )
gscv.fit(y)
print(f"{gscv.best_params_=}")

  warn(
[I 2025-01-24 14:42:36,387] A new study created in memory with name: no-name-e9993a8b-ea8f-4909-8b7a-0f2b2dfaed34


gscv.best_params_={'scaler__transformer__with_scaling': False, 'forecaster': STLForecaster()}


In [7]:
y

Period
1991-01    266.0
1991-02    145.9
1991-03    183.1
1991-04    119.3
1991-05    180.3
1991-06    168.5
1991-07    231.8
1991-08    224.5
1991-09    192.8
1991-10    122.9
1991-11    336.5
1991-12    185.9
1992-01    194.3
1992-02    149.5
1992-03    210.1
1992-04    273.3
1992-05    191.4
1992-06    287.0
1992-07    226.0
1992-08    303.6
1992-09    289.9
1992-10    421.6
1992-11    264.5
1992-12    342.3
1993-01    339.7
1993-02    440.4
1993-03    315.9
1993-04    439.3
1993-05    401.3
1993-06    437.4
1993-07    575.5
1993-08    407.6
1993-09    682.0
1993-10    475.3
1993-11    581.3
1993-12    646.9
Freq: M, Name: Number of shampoo sales, dtype: float64

In [6]:
gscv
