In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp auto

# Auto
Automatic model selection.

In [None]:
#| export
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union

import numpy as np
import optuna
import utilsforecast.processing as ufp
from sklearn.base import BaseEstimator, clone
from sklearn.preprocessing import FunctionTransformer
from utilsforecast.compat import DataFrame
from utilsforecast.losses import smape
from utilsforecast.validation import validate_freq

from mlforecast import MLForecast
from mlforecast.core import Freq, _get_model_name, _name_models
from mlforecast.lag_transforms import ExponentiallyWeightedMean, RollingMean
from mlforecast.optimization import _TrialToConfig, mlforecast_objective
from mlforecast.target_transforms import Differences, LocalStandardScaler, GlobalSklearnTransformer
from mlforecast.utils import PredictionIntervals

In [None]:
#| export
def lightgbm_space(trial: optuna.Trial):
    return {
        'bagging_freq': 1,
        'learning_rate': 0.05,
        'verbosity': -1,        
        'n_estimators': trial.suggest_int('n_estimators', 20, 1000, log=True),
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 2, 4096, log=True),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
        'objective': trial.suggest_categorical('objective', ['l1', 'l2']),
    }

def xgboost_space(trial: optuna.Trial):
    return {
        'n_estimators': trial.suggest_int('n_estimators', 20, 1000),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.2, log=True),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0),
        'bagging_freq': trial.suggest_float('bagging_freq', 0.1, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1.0),
        'min_data_in_leaf': trial.suggest_float('min_data_in_leaf', 1, 100),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 2, 10),
    }
    
def catboost_space(trial: optuna.Trial):
    return {
        'silent': True,
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'depth': trial.suggest_int('depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.2, log=True),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.1, 1.0),
        'min_data_in_leaf': trial.suggest_float('min_data_in_leaf', 1, 100),
    }
    
def linear_regression_space(trial: optuna.Trial):
    return {
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False])
    }
    
def ridge_space(trial: optuna.Trial):
    return {
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'alpha': trial.suggest_float('alpha', 0.001, 10.0)
    }
    
def lasso_space(trial: optuna.Trial):
    return {
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'alpha': trial.suggest_float('alpha', 0.001, 10.0)
    }
    
def elastic_net_space(trial: optuna.Trial):
    return {
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'alpha': trial.suggest_float('alpha', 0.001, 10.0),
        'l1_ratio': trial.suggest_float('l1_ratio', 0.0, 1.0)
    }

def random_forest_space(trial: optuna.Trial):
    return {
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 100),
        'max_features': trial.suggest_float('max_features', 0.5, 1.0),
        'criterion': trial.suggest_categorical('criterion', ['squared_error', 'absolute_error']),
    }

class AutoModel:
    """Structure to hold a model and its search space
 
    Parameters
    ----------
    model : BaseEstimator
        scikit-learn compatible regressor
    config : callable 
        function that takes an optuna trial and produces a configuration
    """
    def __init__(
        self,
        model: BaseEstimator,
        config: _TrialToConfig,
    ):
        self.model = model
        self.config = config

    def __repr__(self):
        return f'AutoModel(model={_get_model_name(self.model)})'

class AutoLightGBM(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from mlforecast.compat import LGBMRegressor
        super().__init__(
            LGBMRegressor(),
            config if config is not None else lightgbm_space,
        )

class AutoXGBoost(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from mlforecast.compat import XGBRegressor
        super().__init__(
            XGBRegressor(),
            config if config is not None else xgboost_space,
        )

class AutoCatboost(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from mlforecast.compat import CatBoostRegressor
        super().__init__(
            CatBoostRegressor(),
            config if config is not None else catboost_space,
        )

class AutoLinearRegression(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from sklearn.linear_model import LinearRegression
        super().__init__(
            LinearRegression(),
            config if config is not None else linear_regression_space,
        )

class AutoRidge(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from sklearn.linear_model import Ridge
        super().__init__(
            Ridge(),
            config if config is not None else ridge_space,
        )

class AutoLasso(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from sklearn.linear_model import Lasso
        super().__init__(
            Lasso(),
            config if config is not None else lasso_space,
        )

class AutoElasticNet(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from sklearn.linear_model import ElasticNet
        super().__init__(
            ElasticNet(),
            config if config is not None else elastic_net_space,
        )

class AutoRandomForest(AutoModel):
    def __init__(
        self,
        config: Optional[_TrialToConfig] = None,
    ):
        from sklearn.ensemble import RandomForestRegressor
        super().__init__(
            RandomForestRegressor(),
            config if config is not None else random_forest_space,
        )

In [None]:
#| hide
from nbdev import show_doc

In [None]:
show_doc(AutoModel)

---

[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L112){target="_blank" style="float:right; font-size:smaller"}

### AutoModel

>      AutoModel (model:sklearn.base.BaseEstimator,
>                 config:Callable[[optuna.trial._trial.Trial],Dict[str,Any]])

*Structure to hold a model and its search space*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| model | BaseEstimator | scikit-learn compatible regressor |
| config | Callable | function that takes an optuna trial and produces a configuration |

In [None]:
#| export
class AutoMLForecast:
    """Hyperparameter optimization helper
    
    Parameters
    ----------
    models : list or dict
        Auto models to be optimized.
    freq : str or int
        pandas' or polars' offset alias or integer denoting the frequency of the series.
    season_length : int
        Length of the seasonal period. This is used for producing the feature space.
    init_config : callable, optional (default=None)
        Function that takes an optuna trial and produces a configuration passed to the MLForecast constructor.
    fit_config : callable, optional (default=None)
        Function that takes an optuna trial and produces a configuration passed to the MLForecast fit method.
    num_threads : int (default=1)
        Number of threads to use when computing the features.
    """
    def __init__(
        self,
        models: Union[List[AutoModel], Dict[str, AutoModel]],
        freq: Freq,
        season_length: int,
        init_config: Optional[_TrialToConfig] = None,
        fit_config: Optional[_TrialToConfig] = None,
        num_threads: int = 1,
    ):
        self.freq = freq
        self.season_length = season_length
        self.num_threads = num_threads
        if isinstance(models, list):
            model_names = _name_models([_get_model_name(m) for m in models])
            models_with_names = dict(zip(model_names, models))
        else:
            models_with_names = models
        self.models = models_with_names
        if init_config is not None and not callable(init_config):
            raise ValueError('`init_config` must be a function.')            
        self.init_config = init_config
        if fit_config is not None:
            if not callable(fit_config):
                raise ValueError('`fit_config` must be a function.')
            self.fit_config = fit_config
        else:
            self.fit_config = lambda trial: {}  # noqa: ARG005

    def __repr__(self):
        return f'AutoMLForecast(models={self.models})'

    def _seasonality_based_config(
        self,
        h: int,
        min_samples: int,
        min_value: float,
    ) -> _TrialToConfig:
        # target transforms  
        candidate_targ_tfms: List[Any] = [
            None,
            [LocalStandardScaler()],
            [Differences([1]), LocalStandardScaler()],
        ]
        log1p_tfm = GlobalSklearnTransformer(
            FunctionTransformer(func=np.log1p, inverse_func=np.expm1)
        )
        if min_value >= 0:
            candidate_targ_tfms.extend(
                [
                    [log1p_tfm, LocalStandardScaler()],
                    [log1p_tfm, Differences([1]), LocalStandardScaler()],
                ]
            )
        # we leave two seasonal periods for the features and model
        if self.season_length > 1 and min_samples > 3 * self.season_length + 1:
            candidate_targ_tfms.append([Differences([1, self.season_length]), LocalStandardScaler()])
            if min_value >= 0:
                candidate_targ_tfms.append(
                    [log1p_tfm, Differences([1, self.season_length]), LocalStandardScaler()],
                )

        # lags
        candidate_lags = [None, [self.season_length]]
        seasonality2extra_candidate_lags = {
            7: [
                [7, 14],
                [7, 28],
            ],
            12: [range(1, 13)],
            24: [
                range(1, 25),
                range(24, 24 * 7 + 1, 24),
            ],
            52: [
                range(4, 53, 4),
            ]
        }
        if self.season_length in seasonality2extra_candidate_lags:
            candidate_lags.extend(
                seasonality2extra_candidate_lags[self.season_length]  # type: ignore
            )
        if h >= 2 * self.season_length:
            candidate_lags.extend(
                [
                    range(self.season_length, h + 1, self.season_length),  # type: ignore
                    [h],
                    [self.season_length, h],
                ]
            )

        # lag transforms
        candidate_lag_tfms = [None, {1: [ExponentiallyWeightedMean(0.9)]}]
        if self.season_length > 1:
            candidate_lag_tfms.append(
                {
                    1: [ExponentiallyWeightedMean(0.9)],
                    self.season_length: [
                        RollingMean(window_size=self.season_length, min_samples=1),
                    ]
                }
            )
        if self.season_length != h:
            candidate_lag_tfms.append(
                {
                    1: [ExponentiallyWeightedMean(0.9)],
                    self.season_length: [
                        RollingMean(window_size=self.season_length, min_samples=1),
                    ],
                    h: [
                        RollingMean(window_size=self.season_length, min_samples=1),
                    ]
                }
            )

        # date features
        seasonality2date_features = {
            1: ['year'],
            4: ['quarter', 'year'],
            7: ['weekday', 'month', 'year'],
            12: ['month', 'year'],
            24: ['hour', 'weekday', 'month', 'year'],
            52: ['week', 'year'],
            60: ['weekday', 'hour', 'second'],
        }
        candidate_date_features = seasonality2date_features.get(self.season_length, [])
        if isinstance(self.freq, int):
            candidate_date_features = []

        def config(trial):
            # target transforms
            targ_tfms_idx = trial.suggest_categorical(
                'target_transforms_idx', range(len(candidate_targ_tfms))
            )
            target_transforms = candidate_targ_tfms[targ_tfms_idx]
    
            # lags
            lags_idx = trial.suggest_categorical('lags_idx', range(len(candidate_lags)))
            lags = candidate_lags[lags_idx]
    
            # lag transforms
            if candidate_lag_tfms:
                lag_tfms_idx = trial.suggest_categorical(
                    'lag_transforms_idx', range(len(candidate_lag_tfms))
                )
                lag_transforms = candidate_lag_tfms[lag_tfms_idx]
            else:
                lag_transforms = None

            # date features
            if candidate_date_features:
                use_date_features = trial.suggest_int('use_date_features', 0, 1)
                if use_date_features:
                    date_features = candidate_date_features
                else:
                    date_features = None        
            else:
                date_features = None
            
            return {
                'lags': lags,
                'target_transforms': target_transforms,
                'lag_transforms': lag_transforms,
                'date_features': date_features,            
            }

        return config

    def fit(
        self,
        df: DataFrame,
        n_windows: int,
        h: int,
        num_samples: int,
        refit: Union[bool, int] = False,
        loss: Optional[Callable[[DataFrame, DataFrame], float]] = None,
        id_col: str = 'unique_id',
        time_col: str = 'ds',
        target_col: str = 'y',
        study_kwargs: Optional[Dict[str, Any]] = None,
        optimize_kwargs: Optional[Dict[str, Any]] = None,
        fitted: bool = False,
        prediction_intervals: Optional[PredictionIntervals] = None,
    ) -> 'AutoMLForecast':
        """Carry out the optimization process.
        Each model is optimized independently and the best one is trained on all data
        
        Parameters
        ----------
        df : pandas or polars DataFrame
            Series data in long format.
        n_windows : int
            Number of windows to evaluate.
        h : int
            Forecast horizon.
        num_samples : int
            Number of trials to run
        refit : bool or int (default=False)
            Retrain model for each cross validation window.
            If False, the models are trained at the beginning and then used to predict each window.
            If positive int, the models are retrained every `refit` windows.
        loss : callable, optional (default=None)
            Function that takes the validation and train dataframes and produces a float.
            If `None` will use the average SMAPE across series.
        id_col : str (default='unique_id')
            Column that identifies each serie.
        time_col : str (default='ds')
            Column that identifies each timestep, its values can be timestamps or integers.
        target_col : str (default='y')
            Column that contains the target.        
        study_kwargs : dict, optional (default=None)
            Keyword arguments to be passed to the optuna.Study constructor.
        optimize_kwargs : dict, optional (default=None)
            Keyword arguments to be passed to the optuna.Study.optimize method.
        fitted : bool (default=False)
            Whether to compute the fitted values when retraining the best model.
        prediction_intervals : 
            Configuration to calibrate prediction intervals when retraining the best model.

        Returns
        -------
        AutoMLForecast
            object with best models and optimization results
        """
        validate_freq(df[time_col], self.freq)
        if self.init_config is not None:
            init_config = self.init_config
        else:
            min_size = ufp.counts_by_id(df, id_col)['counts'].min()
            min_train_size = min_size - n_windows * h
            init_config = self._seasonality_based_config(
                h=h,
                min_samples=min_train_size,
                min_value=df[target_col].min(),
            )

        if loss is None:
            def loss(df, train_df):  # noqa: ARG001
                return smape(df, models=['model'])['model'].mean()
        if study_kwargs is None:
            study_kwargs = {}
        if 'sampler' not in study_kwargs:
            # for reproducibility
            study_kwargs['sampler'] = optuna.samplers.TPESampler(seed=0)
        if optimize_kwargs is None:
            optimize_kwargs = {}

        self.results_ = {}
        self.models_ = {}
        for name, auto_model in self.models.items():
            def config_fn(trial: optuna.Trial) -> Dict[str, Any]:
                return {
                    'model_params': auto_model.config(trial),
                    'mlf_init_params': {
                        **init_config(trial),
                        'num_threads': self.num_threads,
                    },
                    'mlf_fit_params': self.fit_config(trial)
                }

            objective = mlforecast_objective(
                df=df,
                config_fn=config_fn,
                loss=loss,
                model=auto_model.model,
                freq=self.freq,
                n_windows=n_windows,
                h=h,
                refit=refit,
                id_col=id_col,
                time_col=time_col,
                target_col=target_col,
            )
            study = optuna.create_study(direction='minimize', **study_kwargs)
            study.optimize(objective, n_trials=num_samples, **optimize_kwargs)
            self.results_[name] = study
            best_config = study.best_trial.user_attrs['config']
            best_config['mlf_fit_params'].pop('fitted', None)
            best_config['mlf_fit_params'].pop('prediction_intervals', None)
            best_model = clone(auto_model.model)
            best_model.set_params(**best_config['model_params'])
            self.models_[name] = MLForecast(
                models={name: best_model},
                freq=self.freq,
                **best_config['mlf_init_params']
            )
            self.models_[name].fit(
                df,
                fitted=fitted,
                prediction_intervals=prediction_intervals,
                **best_config['mlf_fit_params'],
            )
        return self

    def predict(
        self,
        h: int,
        X_df: Optional[DataFrame] = None,
        level: Optional[List[Union[int, float]]] = None,
    ) -> DataFrame:
        """"Compute forecasts

        Parameters
        ----------
        h : int
            Number of periods to predict.
        X_df : pandas or polars DataFrame, optional (default=None)
            Dataframe with the future exogenous features. Should have the id column and the time column.
        level : list of ints or floats, optional (default=None)
            Confidence levels between 0 and 100 for prediction intervals.

        Returns
        -------
        pandas or polars DataFrame
            Predictions for each serie and timestep, with one column per model.
        """
        all_preds = None
        for name, model in self.models_.items():
            preds = model.predict(h=h, X_df=X_df, level=level)
            if all_preds is None:
                all_preds = preds
            else:
                model_cols = [c for c in preds.columns if c not in all_preds.columns]
                all_preds = ufp.horizontal_concat([all_preds, preds[model_cols]])
        return all_preds

    def save(self, path: Union[str, Path]) -> None:
        """Save AutoMLForecast objects

        Parameters
        ----------
        path : str or pathlib.Path
            Directory where artifacts will be stored."""
        for name, model in self.models_.items():
            model.save(f'{path}/{name}')

    def forecast_fitted_values(
        self,
        level: Optional[List[Union[int, float]]] = None,
    ) -> DataFrame:
        """Access in-sample predictions.

        Parameters
        ----------
        level : list of ints or floats, optional (default=None)
            Confidence levels between 0 and 100 for prediction intervals.

        Returns
        -------
        pandas or polars DataFrame
            Dataframe with predictions for the training set
        """
        fitted_vals = None
        for name, model in self.models_.items():
            model_fitted = model.forecast_fitted_values(level=level)
            if fitted_vals is None:
                fitted_vals = model_fitted
            else:
                fitted_vals = ufp.join(
                    fitted_vals,
                    ufp.drop_columns(model_fitted, model.ts.target_col),
                    on=[model.ts.id_col, model.ts.time_col],
                    how='inner',
                )
        return fitted_vals

In [None]:
show_doc(AutoMLForecast)

---

[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L239){target="_blank" style="float:right; font-size:smaller"}

### AutoMLForecast

>      AutoMLForecast
>                      (models:Union[List[__main__.AutoModel],Dict[str,__main__.
>                      AutoModel]], freq:Union[int,str], season_length:int, init
>                      _config:Optional[Callable[[optuna.trial._trial.Trial],Dic
>                      t[str,Any]]]=None, fit_config:Optional[Callable[[optuna.t
>                      rial._trial.Trial],Dict[str,Any]]]=None,
>                      num_threads:int=1)

*Hyperparameter optimization helper*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| models | Union |  | Auto models to be optimized. |
| freq | Union |  | pandas' or polars' offset alias or integer denoting the frequency of the series. |
| season_length | int |  | Length of the seasonal period. This is used for producing the feature space. |
| init_config | Optional | None | Function that takes an optuna trial and produces a configuration passed to the MLForecast constructor. |
| fit_config | Optional | None | Function that takes an optuna trial and produces a configuration passed to the MLForecast fit method. |
| num_threads | int | 1 | Number of threads to use when computing the features. |

In [None]:
show_doc(AutoMLForecast.fit)

---

[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L431){target="_blank" style="float:right; font-size:smaller"}

### AutoMLForecast.fit

>      AutoMLForecast.fit
>                          (df:Union[pandas.core.frame.DataFrame,polars.datafram
>                          e.frame.DataFrame], n_windows:int, h:int,
>                          num_samples:int, refit:Union[bool,int]=False, loss:Op
>                          tional[Callable[[Union[pandas.core.frame.DataFrame,po
>                          lars.dataframe.frame.DataFrame],Union[pandas.core.fra
>                          me.DataFrame,polars.dataframe.frame.DataFrame]],float
>                          ]]=None, id_col:str='unique_id', time_col:str='ds',
>                          target_col:str='y',
>                          study_kwargs:Optional[Dict[str,Any]]=None,
>                          optimize_kwargs:Optional[Dict[str,Any]]=None,
>                          fitted:bool=False, prediction_intervals:Optional[mlfo
>                          recast.utils.PredictionIntervals]=None)

*Carry out the optimization process.
Each model is optimized independently and the best one is trained on all data*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| df | Union |  | Series data in long format. |
| n_windows | int |  | Number of windows to evaluate. |
| h | int |  | Forecast horizon. |
| num_samples | int |  | Number of trials to run |
| refit | Union | False | Retrain model for each cross validation window.<br>If False, the models are trained at the beginning and then used to predict each window.<br>If positive int, the models are retrained every `refit` windows. |
| loss | Optional | None | Function that takes the validation and train dataframes and produces a float.<br>If `None` will use the average SMAPE across series. |
| id_col | str | unique_id | Column that identifies each serie. |
| time_col | str | ds | Column that identifies each timestep, its values can be timestamps or integers. |
| target_col | str | y | Column that contains the target.         |
| study_kwargs | Optional | None | Keyword arguments to be passed to the optuna.Study constructor. |
| optimize_kwargs | Optional | None | Keyword arguments to be passed to the optuna.Study.optimize method. |
| fitted | bool | False | Whether to compute the fitted values when retraining the best model. |
| prediction_intervals | Optional | None | Configuration to calibrate prediction intervals when retraining the best model. |
| **Returns** | **AutoMLForecast** |  | **object with best models and optimization results** |

In [None]:
show_doc(AutoMLForecast.predict)

---

[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L555){target="_blank" style="float:right; font-size:smaller"}

### AutoMLForecast.predict

>      AutoMLForecast.predict (h:int, X_df:Union[pandas.core.frame.DataFrame,pol
>                              ars.dataframe.frame.DataFrame,NoneType]=None,
>                              level:Optional[List[Union[int,float]]]=None)

*"Compute forecasts*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| h | int |  | Number of periods to predict. |
| X_df | Union | None | Dataframe with the future exogenous features. Should have the id column and the time column. |
| level | Optional | None | Confidence levels between 0 and 100 for prediction intervals. |
| **Returns** | **Union** |  | **Predictions for each serie and timestep, with one column per model.** |

In [None]:
show_doc(AutoMLForecast.save)

---

[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L583){target="_blank" style="float:right; font-size:smaller"}

### AutoMLForecast.save

>      AutoMLForecast.save (path:Union[str,pathlib.Path])

*Save AutoMLForecast objects*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| path | Union | Directory where artifacts will be stored. |
| **Returns** | **None** |  |

In [None]:
show_doc(AutoMLForecast.forecast_fitted_values)

---

[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/auto.py#L593){target="_blank" style="float:right; font-size:smaller"}

### AutoMLForecast.forecast_fitted_values

>      AutoMLForecast.forecast_fitted_values
>                                             (level:Optional[List[Union[int,flo
>                                             at]]]=None)

*Access in-sample predictions.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| level | Optional | None | Confidence levels between 0 and 100 for prediction intervals. |
| **Returns** | **Union** |  | **Dataframe with predictions for the training set** |

In [None]:
from datasetsforecast.m4 import M4, M4Evaluation, M4Info
from sklearn.linear_model import Ridge
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder

In [None]:
def train_valid_split(group):
    df, *_ = M4.load(directory='data', group=group)
    df['ds'] = df['ds'].astype('int')
    horizon = M4Info[group].horizon
    valid = df.groupby('unique_id').tail(horizon).copy()
    train = df.drop(valid.index).reset_index(drop=True)
    return train, valid

In [None]:
ridge_pipeline = make_pipeline(
    ColumnTransformer(
        [('encoder', OneHotEncoder(), ['unique_id'])],
        remainder='passthrough',
    ),
    Ridge()
)
auto_ridge = AutoModel(ridge_pipeline, lambda trial: {f'ridge__{k}': v for k, v in ridge_space(trial).items()})

In [None]:
optuna.logging.set_verbosity(optuna.logging.ERROR)
group = 'Weekly'
train, valid = train_valid_split(group)
train['unique_id'] = train['unique_id'].astype('category')
valid['unique_id'] = valid['unique_id'].astype(train['unique_id'].dtype)
info = M4Info[group]
h = info.horizon
season_length = info.seasonality
auto_mlf = AutoMLForecast(
    freq=1,
    season_length=season_length,
    models={
        'lgb': AutoLightGBM(),
        'ridge': auto_ridge,
    },
    fit_config=lambda trial: {'static_features': ['unique_id']},
    num_threads=2,
)
auto_mlf.fit(
    df=train,
    n_windows=2,
    h=h,
    num_samples=2,
    optimize_kwargs={'timeout': 60},
    fitted=True,
    prediction_intervals=PredictionIntervals(n_windows=2, h=h),
)
auto_mlf.predict(h, level=[80])

Unnamed: 0,unique_id,ds,lgb,lgb-lo-80,lgb-hi-80,ridge,ridge-lo-80,ridge-hi-80
0,W1,2180,35529.435224,35061.835362,35997.035086,36110.921202,35880.445097,36341.397307
1,W1,2181,35521.764894,34973.035617,36070.494171,36195.175757,36051.013811,36339.337702
2,W1,2182,35537.417268,34960.050939,36114.783596,36107.528852,35784.062169,36430.995536
3,W1,2183,35538.058206,34823.640706,36252.475705,36027.139248,35612.635725,36441.642771
4,W1,2184,35614.611211,34627.023739,36602.198683,36092.858489,35389.690977,36796.026000
...,...,...,...,...,...,...,...,...
4662,W99,2292,15071.536978,14484.617399,15658.456557,15319.146221,14869.410567,15768.881875
4663,W99,2293,15058.145278,14229.686322,15886.604234,15299.549555,14584.269352,16014.829758
4664,W99,2294,15042.493434,14096.380636,15988.606232,15271.744712,14365.349338,16178.140086
4665,W99,2295,15042.144846,14037.053904,16047.235787,15250.070504,14403.428791,16096.712216


In [None]:
auto_mlf.forecast_fitted_values(level=[95])

Unnamed: 0,unique_id,ds,y,lgb,lgb-lo-95,lgb-hi-95,ridge,ridge-lo-95,ridge-hi-95
0,W1,15,1071.06,1060.584344,599.618355,1521.550334,1076.990151,556.535492,1597.444810
1,W1,16,1073.73,1072.669242,611.703252,1533.635232,1083.633276,563.178617,1604.087936
2,W1,17,1066.97,1072.452128,611.486139,1533.418118,1084.724311,564.269652,1605.178970
3,W1,18,1066.17,1065.837828,604.871838,1526.803818,1080.127197,559.672538,1600.581856
4,W1,19,1064.43,1065.214681,604.248691,1526.180671,1080.636826,560.182167,1601.091485
...,...,...,...,...,...,...,...,...,...
361881,W99,2279,15738.54,15887.661228,15721.237195,16054.085261,15927.918181,15723.222760,16132.613603
361882,W99,2280,15388.13,15755.943789,15589.519756,15922.367823,15841.599064,15636.903642,16046.294485
361883,W99,2281,15187.62,15432.224701,15265.800668,15598.648735,15584.462232,15379.766811,15789.157654
361884,W99,2282,15172.27,15177.040831,15010.616797,15343.464864,15396.243223,15191.547801,15600.938644


In [None]:
#| polars
import polars as pl

In [None]:
#| polars
train_pl = pl.from_pandas(train.astype({'unique_id': 'str'}))
auto_mlf = AutoMLForecast(
    freq=1,
    season_length=season_length,
    models={'ridge': AutoRidge()},
    num_threads=2,
)
auto_mlf.fit(
    df=train_pl,
    n_windows=2,
    h=h,
    num_samples=2,
    optimize_kwargs={'timeout': 60},
    fitted=True,
    prediction_intervals=PredictionIntervals(n_windows=2, h=h),
)
auto_mlf.predict(h, level=[80])

unique_id,ds,ridge,ridge-lo-80,ridge-hi-80
str,i64,f64,f64,f64
"""W1""",2180,35046.096663,34046.69521,36045.498116
"""W1""",2181,34743.269216,33325.847975,36160.690457
"""W1""",2182,34489.591086,32591.254559,36387.927614
"""W1""",2183,34270.768179,32076.507727,36465.02863
"""W1""",2184,34124.021857,31352.454121,36895.589593
…,…,…,…,…
"""W99""",2292,14719.457096,13983.308582,15455.605609
"""W99""",2293,14631.552077,13928.874336,15334.229818
"""W99""",2294,14532.905239,13642.840118,15422.97036
"""W99""",2295,14446.065443,13665.088667,15227.04222


In [None]:
#| polars
auto_mlf.forecast_fitted_values(level=[95])

unique_id,ds,y,ridge,ridge-lo-95,ridge-hi-95
str,i64,f64,f64,f64,f64
"""W1""",14,1061.96,1249.326428,488.765249,2009.887607
"""W1""",15,1071.06,1246.067836,485.506657,2006.629015
"""W1""",16,1073.73,1254.027897,493.466718,2014.589076
"""W1""",17,1066.97,1254.475948,493.914769,2015.037126
"""W1""",18,1066.17,1248.306754,487.745575,2008.867933
…,…,…,…,…,…
"""W99""",2279,15738.54,15754.558812,15411.968645,16097.148979
"""W99""",2280,15388.13,15655.780865,15313.190698,15998.371032
"""W99""",2281,15187.62,15367.498468,15024.908301,15710.088635
"""W99""",2282,15172.27,15172.591423,14830.001256,15515.18159
