In [None]:
"""Frequency based param spaces place holder as dicts, needs expansion!"""

from mlforecast.target_transforms import Differences, LocalStandardScaler


hourly = {
    'min_lags': 1,
    'max_lags': 24,
    'target_transforms': [[LocalStandardScaler()], [Differences([1])], [Differences([1]), LocalStandardScaler()]]
    }

weekly = {
    'min_lags': 1,
    'max_lags': 52,
    'target_transforms': [[LocalStandardScaler()], [Differences([1])], [Differences([1]), LocalStandardScaler()]]
    }

monthly = {
    'min_lags': 1,
    'max_lags': 12,
    'target_transforms': [[LocalStandardScaler()], [Differences([1])], [Differences([1]), LocalStandardScaler()]]
    }

daily = {
    'min_lags': 1,
    'max_lags': 7,
    'target_transforms': [[LocalStandardScaler()], [Differences([1])], [Differences([1]), LocalStandardScaler()]]
    }


In [None]:
"""Model Specific param space functions"""
def lightgbm(trial):
    return {
        "n_estimators": trial.suggest_int(name="n_estimators", low=20, high=1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 512),
        'feature_fraction': trial.suggest_float('feature_fraction', .1, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', .1, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 0, 15),
        "objective": trial.suggest_categorical("objective", ['regression', 'regression_l1']),
    }
    
def xgboost(trial):
    return {
        "n_estimators": trial.suggest_int(name="n_estimators", low=50, high=1000),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, .2, log=True),
        'subsample': trial.suggest_float('subsample', .1, 1.0),
        'bagging_freq': trial.suggest_float('bagging_freq', .1, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', .1, 1.0),
        'min_data_in_leaf': trial.suggest_float('min_data_in_leaf', 1, 100),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 1.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 1.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 2, 10),
    }
    
def catboost(trial):
    return {
        "n_estimators": trial.suggest_int(name="n_estimators", low=50, high=1000),
        'depth': trial.suggest_int('depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, .2, log=True),
        'subsample': trial.suggest_float('subsample', .1, 1.0),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', .1, 1.0),
        'min_data_in_leaf': trial.suggest_float('min_data_in_leaf', 1, 100),
    }
    
def linear_regression(trial):
    return {
        "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False])
    }
    
def ridge(trial):
    return {
        "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        'alpha': trial.suggest_float('alpha', .001, 10.0)
    }
    
def lasso(trial):
    return {
        "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        'alpha': trial.suggest_float('alpha', .001, 10.0)
    }
    
def elasticnet(trial):
    return {
        "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        'alpha': trial.suggest_float('alpha', .001, 10.0),
        'l1_ratio': trial.suggest_float('l1_ratio', 0.0, 1.0)
    }

def random_forest(trial):
    return {
        "n_estimators": trial.suggest_int(name="n_estimators", low=50, high=1000),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'min_samples_split': trial.suggest_int('min_child_samples', 1, 100),
        'max_features': trial.suggest_float('max_features', .5, 1.0),
        "criterion": trial.suggest_categorical("criterion", ['squared_error', 'poisson']),
    }

In [None]:

import optuna
import numpy as np
from typing import List, Optional
from utilsforecast.compat import DataFrame
from utilsforecast.losses import rmse
from mlforecast.forecast import MLForecast
from mlforecast.target_transforms import LocalStandardScaler
from mlforecast.core import (
    Freq,
    Models,
    _name_models,
)


class Optimizer:
    _mlforecast_params = ('lag_transforms', 'target_transforms', 'date_features', 'static_features', 'id_col', 'time_col', 'target_col', 'lags')
    _optuna_list_params = ('lag_transforms', 'target_transforms', 'date_features')

    def __init__(
        self,
        freq: Freq,
        models: Models,
        model_configs=None,
        min_lags=1,
        max_lags=4,
        target_transforms=[LocalStandardScaler()],
        date_features=None,
        lag_transforms=None,
        id_col: str = "unique_id",
        time_col: str = "ds",
        target_col: str = "y",
        static_features: Optional[List[str]] = None,
    ):
        """Optimizer class which takes in a model config of Optuna trial spaces and MLForecast specific parameters

        Parameters
        ----------
        models : Models
            list of models 
        freq : Freq 
            Frequency of the time column
        id_col : str (default='unique_id')
            Column that identifies each serie.
        time_col : str (default='ds')
            Column that identifies each timestep, its values can be timestamps or integers.
        target_col : str (default='y')
            Column that contains the target.
        static_features : list of str, optional (default=None)
            Names of the features that are static and will be repeated when forecasting.
                If `None`, will consider all columns (except id_col and time_col) as static.
        """
        self.freq = freq
        self.models = models
        self.model_names = _name_models([m.__name__ for m in models])
        self.model_configs = model_configs
        self.target_transforms = Optimizer._sanitize_params(target_transforms)
        self.lag_transforms = Optimizer._sanitize_params(lag_transforms)
        self.date_features = Optimizer._sanitize_params(date_features)
        self.param_configs = Optimizer._build_optuna_param(
            min_lags,
            max_lags,
            self.target_transforms,
            self.lag_transforms,
            self.date_features
        )
        self.id_col = id_col
        self.time_col = time_col
        self.target_col = target_col
        self.static_features = static_features
        return

    def _sanitize_params(parameter):
        if parameter is None:
            return [parameter]
        else:
            return parameter

    @staticmethod
    def _sanitize_output(optuna_param, mlforecast_param):
        return mlforecast_param[optuna_param]

    @staticmethod
    def _build_output(model_obj, params, mlforecast_params):
        param_copy = params.copy()
        model_dict = {key: value for key, value in param_copy.items() if key not in mlforecast_params}
        mlforecast_dict = {key: value for key, value in param_copy.items() if key in mlforecast_params}
        output_dict = {
            'models': [model_obj(**model_dict)],
        }
        output_dict.update(**mlforecast_dict)
        return output_dict

    @staticmethod
    def _build_optuna_param(
        min_lags,
        max_lags,
        target_transforms,
        lag_transforms,
        date_features
    ):
        def configs(trial):
            return {
                'lags': trial.suggest_int(name="lags", low=min_lags, high=max_lags + 1), #seasonal period plus 1 so it creates a list of lags to hand over to mlforecast
                'target_transforms': trial.suggest_int("target_transforms", 0, len(target_transforms)-1),
                'date_features': trial.suggest_int("date_features", 0, len(date_features)-1),
                'lag_transforms': trial.suggest_int("lag_transforms", 0, len(lag_transforms)-1),
            }
        return configs

    def scorer(self, model_params, param_params, loss):
        """Take the given params from the tuner and fit a MLForecast CV to get the score

        Args:
            model_params (_type_): _description_
            param_params (_type_): _description_
            metric (_type_): _description_

        Returns:
            _type_: _description_
        """
        try: #try except commented out for testing
            #taken from CV example: https://nixtlaverse.nixtla.io/mlforecast/docs/how-to-guides/cross_validation.html
            model_obj = self.models[self.model_iter]
            model_name = self.model_names[self.model_iter]
            if 'lags' in param_params.keys():
                param_params['lags'] = list(range(1, param_params['lags']+1)) #going from int to list for mlforecast
            param_params['target_transforms'] = self.target_transforms[param_params['target_transforms']]
            param_params['lag_transforms'] = self.lag_transforms[param_params['lag_transforms']]
            param_params['date_features'] = self.date_features[param_params['date_features']]
            fcst = MLForecast(
                models=[model_obj(**model_params)],
                freq=self.freq,
                **param_params,
            )
            cv_results = fcst.cross_validation(
                self.df,
                dropna=self.dropna,
                n_windows=self.n_windows,
                h=self.h,
                step_size=self.step_size,
            )
            cv_results['id_cutoff'] = cv_results['unique_id'] + '_' + cv_results['cutoff'].astype(str)
            score = loss(cv_results, models=[model_name], id_col='id_cutoff')[model_name].mean()
        except Exception as e:
                    score = np.inf
                    print(f'ERROR WHILE TUNING: {e}')
        return score

    def objective(self, trial):
        model_params = self.model_configs[self.model_iter](trial)
        param_params = self.param_configs(trial) #bad name
        score = self.scorer(model_params, param_params, self.loss)
        return score

    def optimize(
        self,
        df: DataFrame,
        n_windows: int,
        h: int,
        n_trials=100,
        timeout=None,
        loss=rmse,
        step_size: Optional[int] = None,
        dropna: bool = True,
        seed: Optional[int] = None,
    ):
        """Optimize the provided models and params.

        Parameters
        ----------
        df : pandas or polars DataFrame
            Series data in long format.
        n_windows : int
            Number of windows to evaluate.
        h : int
            Forecast horizon.
        dropna : bool (default=True)
            Drop rows with missing values produced by the transformations.

        Returns
        -------
        list
            list of optuna studies, one element per model provided in model list.
        """
        self.df = df
        self.loss = loss
        self.n_windows = n_windows
        self.h = h
        self.step_size = step_size
        self.dropna = dropna
        optuna_studies = []
        param_list = []
        self.raw_params = []
        for i, model in enumerate(self.models):
            self.model_iter = i
            study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=seed))
            study.optimize(
                self.objective,
                n_trials=n_trials,
                timeout=timeout)
            best_params = study.best_params
            best_params.update({'lags': list(range(1, best_params['lags']+1))})
            for param in zip(Optimizer._optuna_list_params, [
                self.lag_transforms,
                self.target_transforms,
                self.date_features]):
                best_params.update({param[0]: Optimizer._sanitize_output(best_params[param[0]],param[1])})
            self.raw_params.append(best_params)
            output_dict = Optimizer._build_output(model, best_params, Optimizer._mlforecast_params)
            param_list.append(output_dict)
            optuna_studies.append(study)
        return param_list, optuna_studies