In [28]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'c:\\Users\\Joaquín Amat\\Documents\\GitHub\\skforecast'

In [29]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

from skforecast.datasets import fetch_dataset
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries


# Data download
# ==============================================================================
#data = fetch_dataset(name="items_sales")
#data.to_parquet('items_sales.parquet', index=True)
data = pd.read_parquet('items_sales.parquet')
data.head()
exog = pd.DataFrame({
    'exog_1': np.random.normal(loc=0, scale=1, size=data.shape[0]),
    'exog_2': np.random.normal(loc=0, scale=1, size=data.shape[0]),
}, index=data.index)

end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()


# Create and train ForecasterAutoregMultiSeries
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 5,
                 encoding           = 'onehot',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(series=data_train, exog=exog.loc[data_train.index])
forecaster



ForecasterAutoregMultiSeries 
Regressor: LGBMRegressor(random_state=123, verbose=-1) 
Lags: [1 2 3 4 5] 
Transformer for series: None 
Transformer for exog: None 
Series encoding: onehot 
Window size: 5 
Series levels (names): ['item_1', 'item_2', 'item_3'] 
Series weights: None 
Weight function included: False 
Differentiation order: None 
Exogenous included: True 
Type of exogenous variable: <class 'pandas.core.frame.DataFrame'> 
Exogenous variables names: ['exog_1', 'exog_2'] 
Training range: ["'item_1': ['0', '926']", "'item_2': ['0', '926']", "'item_3': ['0', '926']"] 
Training index type: RangeIndex 
Training index frequency: 1 
Regressor parameters: boosting_type: gbdt, class_weight: None, colsample_bytree: 1.0, importance_type: split, learning_rate: 0.1, ... 
fit_kwargs: {} 
Creation date: 2024-08-17 10:51:29 
Last fit date: 2024-08-17 10:51:29 
Skforecast version: 0.13.0 
Python version: 3.12.4 
Forecaster id: None 

In [30]:
levels = ['item_1', 'item_2', 'item_3']
n_levels = len(levels)
lags = np.array([1, 2, 3, 4, 5])
steps = 2
lags_shape = len(lags)
exog_shape = 2
encoding = 'onehot'
series_col_names = levels
encoding_mapping = {'item_1': 0, 'item_2': 1, 'item_3': 2}
# Exog es un diccionario donde las claves son el step y los son numpy arrays en los
# que cada fila es un level y cada columna es una variable exogena.
exog = {
    1:np.full(shape=(steps, exog_shape), fill_value=99, dtype=float),
    2:np.full(shape=(steps, exog_shape), fill_value=999, dtype=float),
}
exog

{1: array([[99., 99.],
        [99., 99.]]),
 2: array([[999., 999.],
        [999., 999.]])}

In [31]:
if encoding is not None:
    if encoding == 'onehot':
        levels_encoded = np.zeros((n_levels, len(series_col_names)), dtype=float)
        for idx, level in enumerate(levels):
            if level in series_col_names:
                levels_encoded[idx, series_col_names.index(level)] = 1.
    else:
        levels_encoded = np.array([encoding_mapping.get(level, None) for level in levels], dtype='float64').reshape(-1, 1)
    levels_encoded_shape = levels_encoded.shape[1]
else:
    levels_encoded_shape = 0
levels_encoded

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [32]:
last_window =  np.full(shape=(lags_shape, n_levels), fill_value=5, dtype=float)
predictions =  np.full(shape=(steps, n_levels), fill_value=np.nan, dtype=float)
print(last_window)
print(predictions)
last_window = np.concatenate((last_window, predictions), axis=0)
last_window

[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]
[[nan nan nan]
 [nan nan nan]]


array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [33]:
features_shape = lags_shape + levels_encoded_shape + exog_shape
features = np.full(shape=(n_levels, features_shape), fill_value=np.nan, dtype=float)
features

array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])

In [34]:
if encoding is not None:
    features[:, lags_shape:lags_shape + levels_encoded_shape] = levels_encoded
features

array([[nan, nan, nan, nan, nan,  1.,  0.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  1.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  0.,  1., nan, nan]])

In [35]:
step = 1
i = 0
features[:, :lags_shape] = last_window[-lags - (steps - i), :].transpose()
features


array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., nan, nan]])

In [36]:
if exog is not None:
    features[:, -exog_shape:] = exog[step][i, ].transpose()
features

array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., 99., 99.]])

In [37]:
predictions

array([[nan, nan, nan],
       [nan, nan, nan]])

In [38]:
pred = forecaster.regressor.predict(features)
pred

array([10.75166845, 10.60901505, 10.75166845])

In [39]:
predictions[i, :] = pred
predictions

array([[10.75166845, 10.60901505, 10.75166845],
       [        nan,         nan,         nan]])

In [40]:
last_window

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [41]:
last_window[-(steps - i), :] = pred
last_window

array([[ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [10.75166845, 10.60901505, 10.75166845],
       [        nan,         nan,         nan]])

In [42]:
#data = fetch_dataset(name="items_sales")
#data.to_parquet('items_sales_2.parquet', index=True)
data = pd.read_parquet('items_sales_2.parquet')
data = data.asfreq('D')
data.head()
exog = pd.DataFrame({
    'exog_1': np.random.normal(loc=0, scale=1, size=data.shape[0]),
    'exog_2': np.random.normal(loc=0, scale=1, size=data.shape[0]),
}, index=data.index)

end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

forecaster.fit(series=data_train, exog=exog.loc[data_train.index])

(
            last_window_values_dict,
            exog_values_dict,
            levels,
            prediction_index,
            _
        ) = forecaster._create_predict_inputs(
    steps       = 2,
    levels      = None,
    last_window = None,
    exog        = exog.loc[data_test.index]
)

In [43]:
exog_values_dict

{'item_1': array([[0.309224  , 0.46507128],
        [0.45410965, 0.57300797]]),
 'item_2': array([[0.309224  , 0.46507128],
        [0.45410965, 0.57300797]]),
 'item_3': array([[0.309224  , 0.46507128],
        [0.45410965, 0.57300797]])}

In [44]:
exog_values = np.concat(list(exog_values_dict.values()))
exog_values

array([[0.309224  , 0.46507128],
       [0.45410965, 0.57300797],
       [0.309224  , 0.46507128],
       [0.45410965, 0.57300797],
       [0.309224  , 0.46507128],
       [0.45410965, 0.57300797]])

In [45]:
exog_values_dict_2 = {}
for i in range(steps):
    exog_values_dict_2[i+1] = exog_values[i::steps, :]
exog_values_dict_2
    

{1: array([[0.309224  , 0.46507128],
        [0.309224  , 0.46507128],
        [0.309224  , 0.46507128]]),
 2: array([[0.45410965, 0.57300797],
        [0.45410965, 0.57300797],
        [0.45410965, 0.57300797]])}

In [46]:
predictions_old = forecaster.predict(steps=50, exog=exog.loc[data_test.index])

In [47]:
predictions_new = forecaster.predict_new(steps=50, exog=exog.loc[data_test.index])

In [48]:
predictions_old.equals(predictions_new)

True

In [49]:
n_series = 1000
n=365
index = pd.date_range(start='2021-01-01',periods=n, freq="D")
data = pd.DataFrame(index=index)
for i in range(n_series):
    data[f"series_{i+1}"] = np.random.normal(size=n)
data

  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.ra

Unnamed: 0,series_1,series_2,series_3,series_4,series_5,series_6,series_7,series_8,series_9,series_10,...,series_991,series_992,series_993,series_994,series_995,series_996,series_997,series_998,series_999,series_1000
2021-01-01,0.385253,-0.546739,0.107267,0.342178,-0.538943,0.128043,1.474854,1.268088,-2.618430,0.380283,...,-1.263943,-2.559166,-0.096237,-1.006167,-0.727323,1.301617,0.778482,0.228658,2.026948,-1.492443
2021-01-02,-2.421784,-2.251803,-1.624448,1.468397,-0.157217,-0.648902,-0.134597,0.580626,2.717483,0.676409,...,-1.637674,0.881047,-1.012933,-1.102724,2.131592,1.821182,0.558613,-0.819953,-0.295392,-0.681771
2021-01-03,-0.003952,1.529835,-1.142532,0.322376,0.312275,1.387458,-1.154200,1.124284,-0.917663,0.255957,...,-0.042752,0.694784,-0.216726,2.492841,2.301552,0.299355,0.539274,-0.551236,1.872228,0.281925
2021-01-04,-1.204778,0.204666,1.262535,0.302506,-0.215642,-1.484829,0.282531,0.069232,-0.894341,-1.290878,...,-1.187938,-0.640535,-1.403928,-0.158196,0.466426,0.099680,0.397384,0.572043,0.896807,-0.445963
2021-01-05,-0.632131,0.207034,-0.400760,1.131071,-0.470929,-0.822672,0.730837,1.157421,0.504307,0.600473,...,-0.564097,-0.320959,2.081341,-0.683245,-0.283321,0.654337,-0.785095,-1.039053,-1.035904,0.445633
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,0.931506,-0.333126,-0.564401,-1.294957,0.256349,0.362575,0.428314,-0.357398,0.697497,2.840381,...,-1.473189,-2.039687,1.000584,-0.438598,-0.668966,-0.253931,-0.635006,-0.022132,-0.604317,-0.392076
2021-12-28,0.419512,1.094982,-1.121767,0.159491,-0.132650,1.142348,-0.984443,0.099168,0.185402,1.872414,...,-0.808249,0.296443,0.598089,2.952214,-0.942110,1.185089,1.022446,-0.347032,0.240625,1.807602
2021-12-29,-1.817501,-0.029083,-0.294048,0.747690,0.474790,-0.115736,0.369263,-0.531749,-0.101367,1.038845,...,-1.347617,1.814894,0.079214,1.489778,1.639185,0.343543,0.611729,-0.427378,0.071341,-0.311484
2021-12-30,-0.296495,0.958373,-0.549366,0.983113,0.660091,0.504700,-0.108179,-1.485987,-0.701141,1.766665,...,0.506084,0.373134,-3.079920,1.415315,0.434276,0.100494,-0.942873,0.800068,-0.407302,1.417773


In [50]:
forecaster.fit(series = data)

In [51]:
%%timeit
forecaster.predict(steps=5)

2.95 s ± 269 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [52]:
%%timeit
forecaster.predict_new(steps=5)

245 ms ± 34.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [53]:
forecaster.predict(steps=5).equals(forecaster.predict_new(steps=5))

True