In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent.parent))
%config Completer.use_jedi = False

In [11]:
## ForecasterAutoregMultiSeries
# ==============================================================================
import numpy as np
import pandas as pd

from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries.model_selection_multiseries import _evaluate_grid_hyperparameters_multiseries

from sklearn.model_selection import ParameterGrid
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

## ForecasterAutoregMultiSeries

In [15]:
y_1 = pd.Series(np.random.rand(20))
y_2 = pd.Series(np.random.rand(20))
exog_1 = pd.Series(np.random.rand(20))
exog_2 = pd.Series(np.random.rand(20))

df = pd.DataFrame({'s1': y_1, 's2': y_2, 'exog_1': exog_1, 'exog_2': exog_2,
                   'date':pd.date_range(start='2022-01-01', periods=20, freq='1D')})

df = df.set_index('date')
df = df.asfreq('1D')
df = df.sort_index()

max_lag=3
lags=np.array([1,2,3])

print(df.shape)
df.tail(3)

(20, 4)


Unnamed: 0_level_0,s1,s2,exog_1,exog_2
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-18,0.55397,0.635592,0.107618,0.835754
2022-01-19,0.951948,0.809366,0.780165,0.643191
2022-01-20,0.947452,0.868955,0.750736,0.155978


In [16]:
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                    regressor = LinearRegression(),
                    lags      = 3
             )

forecaster.fit(
    series = df[['s1', 's2']],
    exog   = df[['exog_1', 'exog_2']]
)

forecaster

ForecasterAutoregMultiSeries 
Regressor: LinearRegression() 
Lags: [1 2 3] 
Window size: 3 
Included exogenous: True 
Type of exogenous variable: <class 'pandas.core.frame.DataFrame'> 
Exogenous variables names: ['exog_1', 'exog_2'] 
Training range: [Timestamp('2022-01-01 00:00:00'), Timestamp('2022-01-20 00:00:00')] 
Training index type: DatetimeIndex 
Training index frequency: D 
Regressor parameters: {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'normalize': 'deprecated', 'positive': False} 
Creation date: 2022-06-06 15:46:17 
Last fit date: 2022-06-06 15:46:17 
Skforecast version: 0.5.dev1 

In [17]:
# Predict
# ==============================================================================
steps = 3
exog_1_test = pd.Series(np.random.rand(steps))
exog_2_test = pd.Series(np.random.rand(steps))

df_test = pd.DataFrame({'exog_1': exog_1_test, 'exog_2': exog_2_test,
                        'date':pd.date_range(start=df.index[-1], periods=steps, freq='1D')})

df_test = df_test.set_index('date')
df_test = df_test.asfreq('1D')
df_test = df_test.sort_index()

predictions = forecaster.predict(
                steps = steps,
                level = 's2',
                exog = df_test[['exog_1', 'exog_2']]
              )

print(forecaster.level)
predictions

s2


2022-01-21    0.438949
2022-01-22    0.565801
2022-01-23    0.561593
Freq: D, Name: pred, dtype: float64

In [18]:
# Predict interval
# ==============================================================================
predictions = forecaster.predict_interval(
                steps = steps,
                level = 's2',
                exog = df_test[['exog_1', 'exog_2']]
              )

print(forecaster.level)
predictions

s2


Unnamed: 0,pred,lower_bound,upper_bound
2022-01-21,0.438949,0.041156,0.837512
2022-01-22,0.565801,0.173634,0.946079
2022-01-23,0.561593,0.16746,0.948181


In [19]:
# Backtest forecaster refit True, fixed_train_size True
# ==============================================================================
metric, predictions_backtest = backtesting_forecaster_multiseries(
                                    forecaster = forecaster,
                                    series     = df[['s1', 's2']],
                                    level      = 's1',
                                    refit      = True,
                                    initial_train_size = 12,
                                    fixed_train_size   = True,
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

metric, predictions_backtest

Information of backtesting process
----------------------------------
Number of observations used for initial training: 12
Number of observations used for backtesting: 8
    Number of folds: 2
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   2022-01-01 00:00:00 -- 2022-01-12 00:00:00  (n=12)
    Validation: 2022-01-13 00:00:00 -- 2022-01-16 00:00:00  (n=4)
Data partition in fold: 1
    Training:   2022-01-05 00:00:00 -- 2022-01-16 00:00:00  (n=12)
    Validation: 2022-01-17 00:00:00 -- 2022-01-20 00:00:00  (n=4)



(0.11641854659291993,
                 pred
 2022-01-13  0.452132
 2022-01-14  0.265883
 2022-01-15  0.296377
 2022-01-16  0.371779
 2022-01-17  0.377557
 2022-01-18  0.375606
 2022-01-19  0.396852
 2022-01-20  0.394283)

In [20]:
# Backtest forecaster refit True, fixed_train_size False
# ==============================================================================
metric, predictions_backtest = backtesting_forecaster_multiseries(
                                    forecaster = forecaster,
                                    series     = df[['s1', 's2']],
                                    level      = 's1',
                                    refit      = True,
                                    initial_train_size = 12,
                                    fixed_train_size   = False,
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

metric, predictions_backtest

Information of backtesting process
----------------------------------
Number of observations used for initial training: 12
Number of observations used for backtesting: 8
    Number of folds: 2
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   2022-01-01 00:00:00 -- 2022-01-12 00:00:00  (n=12)
    Validation: 2022-01-13 00:00:00 -- 2022-01-16 00:00:00  (n=4)
Data partition in fold: 1
    Training:   2022-01-01 00:00:00 -- 2022-01-16 00:00:00  (n=16)
    Validation: 2022-01-17 00:00:00 -- 2022-01-20 00:00:00  (n=4)



(0.10939179494350493,
                 pred
 2022-01-13  0.452132
 2022-01-14  0.265883
 2022-01-15  0.296377
 2022-01-16  0.371779
 2022-01-17  0.471189
 2022-01-18  0.400364
 2022-01-19  0.452489
 2022-01-20  0.442104)

In [21]:
# Backtest forecaster refit False
# ==============================================================================
metric, predictions_backtest = backtesting_forecaster_multiseries(
                                    forecaster = forecaster,
                                    series     = df[['s1', 's2']],
                                    level      = 's1',
                                    refit      = False,
                                    initial_train_size = 12,
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

metric, predictions_backtest

Information of backtesting process
----------------------------------
Number of observations used for initial training: 12
Number of observations used for backtesting: 8
    Number of folds: 2
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   2022-01-01 00:00:00 -- 2022-01-12 00:00:00  (n=12)
    Validation: 2022-01-13 00:00:00 -- 2022-01-16 00:00:00  (n=4)
Data partition in fold: 1
    Training:   2022-01-01 00:00:00 -- 2022-01-12 00:00:00  (n=12)
    Validation: 2022-01-17 00:00:00 -- 2022-01-20 00:00:00  (n=4)



(0.12193079896247457,
                 pred
 2022-01-13  0.452132
 2022-01-14  0.265883
 2022-01-15  0.296377
 2022-01-16  0.371779
 2022-01-17  0.402262
 2022-01-18  0.349748
 2022-01-19  0.407087
 2022-01-20  0.366944)

## Hyperparameter optimization

In [52]:
forecaster = ForecasterAutoregMultiSeries(
                regressor = RandomForestRegressor(random_state=123),
                lags      = 3 # Placeholder, the value will be overwritten
             )

# Lags used as predictors
lags_grid = [2, 3]

# Regressor hyperparameters
param_grid = {'n_estimators': [10, 12],
              'max_depth': [3, 5]}

param_grid = list(ParameterGrid(param_grid))

results_grid = _evaluate_grid_hyperparameters_multiseries(
                        forecaster  = forecaster,
                        series      = df[['s1', 's2']],
                        param_grid  = param_grid,
                        lags_grid   = lags_grid,
                        steps       = 4,
                        refit       = True,
                        metric      = 'mean_squared_error',
                        initial_train_size = 12,
                        fixed_train_size   = True,
                        return_best = True,
                        verbose     = False
               )

Number of models compared: 16.


loop lags_grid:   0%|                                               | 0/2 [00:00<?, ?it/s]
loop param_grid:   0%|                                              | 0/4 [00:00<?, ?it/s][A
loop param_grid:  75%|████████████████████████████▌         | 3/4 [00:00<00:00, 20.76it/s][A
loop lags_grid:  50%|███████████████████▌                   | 1/2 [00:00<00:00,  4.91it/s][A
loop param_grid:   0%|                                              | 0/4 [00:00<?, ?it/s][A
loop param_grid:  50%|███████████████████                   | 2/4 [00:00<00:00, 17.38it/s][A
loop param_grid: 100%|██████████████████████████████████████| 4/4 [00:00<00:00, 18.76it/s][A
loop lags_grid: 100%|███████████████████████████████████████| 2/2 [00:00<00:00,  4.69it/s][A

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [1 2] 
  Parameters: {'max_depth': 3, 'n_estimators': 10}
  Backtesting metric: 0.08568393805096622
  Levels: ['s1', 's2'] 
  Levels weights: {'s1': 0.5, 's2': 0.5} 






In [53]:
results_grid

Unnamed: 0,levels,lags,params,metric,max_depth,n_estimators
0,"[s1, s2]","[1, 2]","{'max_depth': 3, 'n_estimators': 10}",0.085684,3,10
1,"[s1, s2]","[1, 2]","{'max_depth': 3, 'n_estimators': 12}",0.08741,3,12
2,"[s1, s2]","[1, 2]","{'max_depth': 5, 'n_estimators': 10}",0.094286,5,10
3,"[s1, s2]","[1, 2]","{'max_depth': 5, 'n_estimators': 12}",0.097078,5,12
4,"[s1, s2]","[1, 2, 3]","{'max_depth': 3, 'n_estimators': 10}",0.105619,3,10
6,"[s1, s2]","[1, 2, 3]","{'max_depth': 5, 'n_estimators': 10}",0.109143,5,10
7,"[s1, s2]","[1, 2, 3]","{'max_depth': 5, 'n_estimators': 12}",0.112304,5,12
5,"[s1, s2]","[1, 2, 3]","{'max_depth': 3, 'n_estimators': 12}",0.117046,3,12


In [25]:
results_grid

Unnamed: 0,levels,lags,params,metric,max_depth,n_estimators
4,[s2],"[1, 2, 3]","{'max_depth': 3, 'n_estimators': 10}",0.085284,3,10
5,[s2],"[1, 2, 3]","{'max_depth': 3, 'n_estimators': 12}",0.085913,3,12
7,[s2],"[1, 2, 3]","{'max_depth': 5, 'n_estimators': 12}",0.088803,5,12
6,[s2],"[1, 2, 3]","{'max_depth': 5, 'n_estimators': 10}",0.093848,5,10
0,[s2],"[1, 2]","{'max_depth': 3, 'n_estimators': 10}",0.099212,3,10
1,[s2],"[1, 2]","{'max_depth': 3, 'n_estimators': 12}",0.106265,3,12
2,[s2],"[1, 2]","{'max_depth': 5, 'n_estimators': 10}",0.108759,5,10
3,[s2],"[1, 2]","{'max_depth': 5, 'n_estimators': 12}",0.111977,5,12


In [33]:
(0.078421 + 0.106265)/2

0.09234300000000001

In [28]:
results_grid

Unnamed: 0,levels,lags,params,metric,max_depth,n_estimators
1,"[s1, s2]","[1, 2]","{'max_depth': 3, 'n_estimators': 12}",0.092343,3,12
0,"[s1, s2]","[1, 2]","{'max_depth': 3, 'n_estimators': 10}",0.101738,3,10
5,"[s1, s2]","[1, 2, 3]","{'max_depth': 3, 'n_estimators': 12}",0.106701,3,12
7,"[s1, s2]","[1, 2, 3]","{'max_depth': 5, 'n_estimators': 12}",0.108936,5,12
2,"[s1, s2]","[1, 2]","{'max_depth': 5, 'n_estimators': 10}",0.109556,5,10
4,"[s1, s2]","[1, 2, 3]","{'max_depth': 3, 'n_estimators': 10}",0.112275,3,10
6,"[s1, s2]","[1, 2, 3]","{'max_depth': 5, 'n_estimators': 10}",0.116062,5,10
3,"[s1, s2]","[1, 2]","{'max_depth': 5, 'n_estimators': 12}",0.122368,5,12


In [37]:
0.078421*0.82 + 0.106265*0.18

0.08343292

In [36]:
results_grid

Unnamed: 0,levels,lags,params,metric,max_depth,n_estimators
1,"[s1, s2]","[1, 2]","{'max_depth': 3, 'n_estimators': 12}",0.083433,3,12
0,"[s1, s2]","[1, 2]","{'max_depth': 3, 'n_estimators': 10}",0.103354,3,10
2,"[s1, s2]","[1, 2]","{'max_depth': 5, 'n_estimators': 10}",0.110067,5,10
5,"[s1, s2]","[1, 2, 3]","{'max_depth': 3, 'n_estimators': 12}",0.120005,3,12
7,"[s1, s2]","[1, 2, 3]","{'max_depth': 5, 'n_estimators': 12}",0.121821,5,12
3,"[s1, s2]","[1, 2]","{'max_depth': 5, 'n_estimators': 12}",0.129018,5,12
4,"[s1, s2]","[1, 2, 3]","{'max_depth': 3, 'n_estimators': 10}",0.129549,3,10
6,"[s1, s2]","[1, 2, 3]","{'max_depth': 5, 'n_estimators': 10}",0.130279,5,10


In [47]:
cols = list(df[['s1', 's2']].columns)

In [43]:
levels_weights= {'s1': 0.82, 's2': 0.18} 

In [50]:
if cols != list(levels_weights.keys()):
    print(1)