In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/ubuntu/varios/skforecast'

In [2]:
import skforecast
print(skforecast.__version__)

0.13.0


In [3]:
import numpy as np
import pandas as pd
from skforecast.datasets import fetch_dataset
import skforecast
import lightgbm
import sklearn
from lightgbm import LGBMRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import RFECV
from skforecast.ForecasterBaseline import ForecasterEquivalentDate
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection import bayesian_search_forecaster
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries



In [4]:
# Descarga de datos
# ==============================================================================
datos = fetch_dataset(name='vic_electricity', raw=True)
datos.info()

vic_electricity
---------------
Half-hourly electricity demand for Victoria, Australia
O'Hara-Wild M, Hyndman R, Wang E, Godahewa R (2022).tsibbledata: Diverse
Datasets for 'tsibble'. https://tsibbledata.tidyverts.org/,
https://github.com/tidyverts/tsibbledata/.
https://tsibbledata.tidyverts.org/reference/vic_elec.html
Shape of the dataset: (52608, 5)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52608 entries, 0 to 52607
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Time         52608 non-null  object 
 1   Demand       52608 non-null  float64
 2   Temperature  52608 non-null  float64
 3   Date         52608 non-null  object 
 4   Holiday      52608 non-null  bool   
dtypes: bool(1), float64(2), object(2)
memory usage: 1.7+ MB


In [5]:
datos = datos.head(1000)
datos['Demand_2'] = datos['Demand'].copy()

In [6]:
# Búsqueda bayesiana de hiperparámetros
# ==============================================================================
forecaster = ForecasterAutoreg(
                 regressor = LGBMRegressor(random_state=15926, verbose=-1),
                 lags      = 24, # Este valor se modifica durante la búsqueda
             )

# Lags utilizados como predictores
lags_grid = [24, [1, 2, 3, 23, 24, 25, 47, 48, 49]]

# Espacio de búsqueda de hiperparámetros
def search_space(trial):
    search_space  = {
        'n_estimators'  : trial.suggest_int('n_estimators', 600, 1200, step=100),
        'max_depth'     : trial.suggest_int('max_depth', 3, 10, step=1),
        'learning_rate' : trial.suggest_float('learning_rate', 0.01, 0.5),
        'reg_alpha'     : trial.suggest_float('reg_alpha', 0, 1, step=0.1),
        'reg_lambda'    : trial.suggest_float('reg_lambda', 0, 1, step=0.1),
        'lags'          : trial.suggest_categorical('lags', lags_grid)
    } 
    return search_space

resultados_busqueda, frozen_trial = bayesian_search_forecaster(
                                        forecaster         = forecaster,
                                        y                  = datos['Demand'],
                                        steps              = 24,
                                        metric             = 'mean_absolute_error',
                                        search_space       = search_space,
                                        initial_train_size = 500,
                                        refit              = False,
                                        n_trials           = 3, # Aumentar para una búsqueda más exhaustiva
                                        random_state       = 123,
                                        return_best        = True,
                                        n_jobs             = 'auto',
                                        verbose            = False,
                                        show_progress      = True
                                    )

  0%|          | 0/3 [00:00<?, ?it/s]

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3 23 24 25 47 48 49] 
  Parameters: {'n_estimators': 800, 'max_depth': 8, 'learning_rate': 0.09942094792221498, 'reg_alpha': 0.1, 'reg_lambda': 0.5}
  Backtesting metric: 381.4106015718004



In [7]:
# Búsqueda bayesiana de hiperparámetros
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor = LGBMRegressor(random_state=15926, verbose=-1),
                 lags      = 24, # Este valor se modifica durante la búsqueda
             )

# Lags utilizados como predictores
lags_grid = [24, [1, 2, 3, 23, 24, 25, 47, 48, 49]]

# Espacio de búsqueda de hiperparámetros
def search_space(trial):
    search_space  = {
        'n_estimators'  : trial.suggest_int('n_estimators', 600, 1200, step=100),
        'max_depth'     : trial.suggest_int('max_depth', 3, 10, step=1),
        'learning_rate' : trial.suggest_float('learning_rate', 0.01, 0.5),
        'reg_alpha'     : trial.suggest_float('reg_alpha', 0, 1, step=0.1),
        'reg_lambda'    : trial.suggest_float('reg_lambda', 0, 1, step=0.1),
        'lags'          : trial.suggest_categorical('lags', lags_grid)
    } 
    return search_space

resultados_busqueda, frozen_trial = bayesian_search_forecaster_multiseries(
                                        forecaster         = forecaster,
                                        series             = datos[['Demand', 'Demand_2']],
                                        steps              = 24,
                                        metric             = 'mean_absolute_error',
                                        search_space       = search_space,
                                        initial_train_size = 500,
                                        refit              = False,
                                        n_trials           = 3, # Aumentar para una búsqueda más exhaustiva
                                        random_state       = 123,
                                        return_best        = True,
                                        n_jobs             = 'auto',
                                        verbose            = False,
                                        show_progress      = True
                                    )

  0%|          | 0/3 [00:00<?, ?it/s]

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3 23 24 25 47 48 49] 
  Parameters: {'n_estimators': 800, 'max_depth': 8, 'learning_rate': 0.09942094792221498, 'reg_alpha': 0.1, 'reg_lambda': 0.5}
  Backtesting metric: 344.1008153314714
  Levels: ['Demand', 'Demand_2']

