In [2]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/ubuntu/varios/skforecast'

In [102]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

from skforecast.datasets import fetch_dataset
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries


# Data download
# ==============================================================================
data = fetch_dataset(name="items_sales")
data.head()
exog = pd.DataFrame({
    'exog_1': np.random.normal(loc=0, scale=1, size=data.shape[0]),
    'exog_2': np.random.normal(loc=0, scale=1, size=data.shape[0]),
}, index=data.index)

end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()


# Create and train ForecasterAutoregMultiSeries
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 5,
                 encoding           = 'onehot',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(series=data_train, exog=exog.loc[data_train.index])
forecaster

items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)


ForecasterAutoregMultiSeries 
Regressor: LGBMRegressor(random_state=123, verbose=-1) 
Lags: [1 2 3 4 5] 
Transformer for series: None 
Transformer for exog: None 
Series encoding: onehot 
Window size: 5 
Series levels (names): ['item_1', 'item_2', 'item_3'] 
Series weights: None 
Weight function included: False 
Differentiation order: None 
Exogenous included: True 
Type of exogenous variable: <class 'pandas.core.frame.DataFrame'> 
Exogenous variables names: ['exog_1', 'exog_2'] 
Training range: ["'item_1': ['2012-01-01', '2014-07-15']", "'item_2': ['2012-01-01', '2014-07-15']", "'item_3': ['2012-01-01', '2014-07-15']"] 
Training index type: DatetimeIndex 
Training index frequency: D 
Regressor parameters: boosting_type: gbdt, class_weight: None, colsample_bytree: 1.0, importance_type: split, learning_rate: 0.1, ... 
fit_kwargs: {} 
Creation date: 2024-08-16 12:30:58 
Last fit date: 2024-08-16 12:30:58 
Skforecast version: 0.13.0 
Python version: 3.12.4 
Forecaster id: None 

In [91]:
def _recursive_predict_all_levels(
    self,
    steps: int,
    levels: list,
    last_window: np.ndarray,
    exog: Optional[np.ndarray] = None,
) -> np.ndarray:
    """
    Predict n steps for one or multiple levels. It is an iterative process
    in which, each prediction, is used as a predictor for the next step.

    Parameters
    ----------
    steps : int
        Number of future steps predicted.
    levels : list
        Time series to be predicted.
    last_window : numpy ndarray
        Series values used to create the features (lags) needed in the
        first iteration of the prediction (t + 1).
    exog : numpy ndarray, default `None`
        Exogenous variable/s included as predictor/s.

    Returns
    -------
    predictions : numpy ndarray
        Predicted values.

    """
    n_levels = len(levels)

    if self.encoding is not None:
        if self.encoding == "onehot":
            levels_encoded = np.zeros(
                (n_levels, len(self.series_col_names)), dtype=float
            )
            for idx, level in enumerate(levels):
                if level in self.series_col_names:
                    levels_encoded[idx, self.series_col_names.index(level)] = 1.0
        else:
            levels_encoded = np.array(
                [self.encoding_mapping.get(level, None) for level in levels],
                dtype="float64",
            ).reshape(-1, 1)
        levels_encoded_shape = levels_encoded.shape[1]
    else:
        levels_encoded_shape = 0

    lags_shape = len(self.lags)
    exog_shape = exog.shape[1] if exog is not None else 0

    features_shape = lags_shape + levels_encoded_shape + exog_shape
    features = np.full(shape=(n_levels, features_shape), fill_value=np.nan, dtype=float)
    if self.encoding is not None:
        features[lags_shape : lags_shape + levels_encoded_shape] = levels_encoded

    predictions = np.full(shape=(steps, n_levels), fill_value=np.nan, dtype=float)
    last_window = np.concatenate((last_window, predictions), axis=0)

    for i in range(steps):

        features[:, :lags_shape] = last_window[-self.lags - (steps - i), :]
        if exog is not None:
            features[:, -exog_shape:] = exog[i,].transpose()

        with warnings.catch_warnings():
            # Suppress scikit-learn warning: "X does not have valid feature names,
            # but NoOpTransformer was fitted with feature names".
            warnings.simplefilter("ignore", category=UserWarning)
            pred = self.regressor.predict(features.reshape(1, -1)).ravel()[0]
            predictions[1, :] = pred

        # Update `last_window` values. The first position is discarded and
        # the new prediction is added at the end.
        last_window[-(steps - i), :] = pred

    return predictions

NameError: name 'Optional' is not defined

In [103]:
levels = ['item_1', 'item_2', 'item_3']
n_levels = len(levels)
lags = np.array([1, 2, 3, 4, 5])
steps = 2
lags_shape = len(lags)
exog_shape = 2
encoding = 'onehot'
series_col_names = levels
encoding_mapping = {'item_1': 0, 'item_2': 1, 'item_3': 2}
exog = np.full(shape=(steps, exog_shape), fill_value=99, dtype=float)

In [104]:
if encoding is not None:
    if encoding == 'onehot':
        levels_encoded = np.zeros((n_levels, len(series_col_names)), dtype=float)
        for idx, level in enumerate(levels):
            if level in series_col_names:
                levels_encoded[idx, series_col_names.index(level)] = 1.
    else:
        levels_encoded = np.array([encoding_mapping.get(level, None) for level in levels], dtype='float64').reshape(-1, 1)
    levels_encoded_shape = levels_encoded.shape[1]
else:
    levels_encoded_shape = 0
levels_encoded

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [105]:
last_window =  np.full(shape=(lags_shape, n_levels), fill_value=5, dtype=float)
predictions =  np.full(shape=(steps, n_levels), fill_value=np.nan, dtype=float)
print(last_window)
print(predictions)
last_window = np.concatenate((last_window, predictions), axis=0)
last_window

[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]
[[nan nan nan]
 [nan nan nan]]


array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [106]:
features_shape = lags_shape + levels_encoded_shape + exog_shape
features = np.full(shape=(n_levels, features_shape), fill_value=np.nan, dtype=float)
features

array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])

In [107]:
if encoding is not None:
    features[:, lags_shape:lags_shape + levels_encoded_shape] = levels_encoded
features

array([[nan, nan, nan, nan, nan,  1.,  0.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  1.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  0.,  1., nan, nan]])

In [108]:
i = 0
features[:, :lags_shape] = last_window[-lags - (steps - i), :].transpose()
features


array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., nan, nan]])

In [109]:
if exog is not None:
    features[:, -exog_shape:] = exog[i, ].transpose()
features

array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., 99., 99.]])

In [112]:
predictions

array([[nan, nan, nan],
       [nan, nan, nan]])

In [114]:
pred = forecaster.regressor.predict(features)
pred

array([11.40817988, 11.38620842, 11.40817988])

In [116]:
predictions[i, :] = pred
predictions

array([[11.40817988, 11.38620842, 11.40817988],
       [        nan,         nan,         nan]])

In [117]:
last_window

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [119]:
last_window[-(steps - i), :] = pred
last_window

array([[ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [11.40817988, 11.38620842, 11.40817988],
       [        nan,         nan,         nan]])