In [22]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'c:\\Users\\Joaquín Amat\\Documents\\GitHub\\skforecast'

In [23]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

from skforecast.datasets import fetch_dataset
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries


# Data download
# ==============================================================================
#data = fetch_dataset(name="items_sales")
#data.to_parquet('items_sales.parquet', index=True)
data = pd.read_parquet('items_sales.parquet')
data.head()
exog = pd.DataFrame({
    'exog_1': np.random.normal(loc=0, scale=1, size=data.shape[0]),
    'exog_2': np.random.normal(loc=0, scale=1, size=data.shape[0]),
}, index=data.index)

end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()


# Create and train ForecasterAutoregMultiSeries
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 5,
                 encoding           = 'onehot',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(series=data_train, exog=exog.loc[data_train.index])
forecaster



ForecasterAutoregMultiSeries 
Regressor: LGBMRegressor(random_state=123, verbose=-1) 
Lags: [1 2 3 4 5] 
Transformer for series: None 
Transformer for exog: None 
Series encoding: onehot 
Window size: 5 
Series levels (names): ['item_1', 'item_2', 'item_3'] 
Series weights: None 
Weight function included: False 
Differentiation order: None 
Exogenous included: True 
Type of exogenous variable: <class 'pandas.core.frame.DataFrame'> 
Exogenous variables names: ['exog_1', 'exog_2'] 
Training range: ["'item_1': ['0', '926']", "'item_2': ['0', '926']", "'item_3': ['0', '926']"] 
Training index type: RangeIndex 
Training index frequency: 1 
Regressor parameters: boosting_type: gbdt, class_weight: None, colsample_bytree: 1.0, importance_type: split, learning_rate: 0.1, ... 
fit_kwargs: {} 
Creation date: 2024-08-17 11:20:33 
Last fit date: 2024-08-17 11:20:33 
Skforecast version: 0.13.0 
Python version: 3.12.4 
Forecaster id: None 

In [24]:
levels = ['item_1', 'item_2', 'item_3']
n_levels = len(levels)
lags = np.array([1, 2, 3, 4, 5])
steps = 2
lags_shape = len(lags)
exog_shape = 2
encoding = 'onehot'
series_col_names = levels
encoding_mapping = {'item_1': 0, 'item_2': 1, 'item_3': 2}
# Exog es un diccionario donde las claves son el step y los son numpy arrays en los
# que cada fila es un level y cada columna es una variable exogena.
exog = {
    1:np.full(shape=(steps, exog_shape), fill_value=99, dtype=float),
    2:np.full(shape=(steps, exog_shape), fill_value=999, dtype=float),
}
exog

{1: array([[99., 99.],
        [99., 99.]]),
 2: array([[999., 999.],
        [999., 999.]])}

In [25]:
if encoding is not None:
    if encoding == 'onehot':
        levels_encoded = np.zeros((n_levels, len(series_col_names)), dtype=float)
        for idx, level in enumerate(levels):
            if level in series_col_names:
                levels_encoded[idx, series_col_names.index(level)] = 1.
    else:
        levels_encoded = np.array([encoding_mapping.get(level, None) for level in levels], dtype='float64').reshape(-1, 1)
    levels_encoded_shape = levels_encoded.shape[1]
else:
    levels_encoded_shape = 0
levels_encoded

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [26]:
last_window =  np.full(shape=(lags_shape, n_levels), fill_value=5, dtype=float)
predictions =  np.full(shape=(steps, n_levels), fill_value=np.nan, dtype=float)
print(last_window)
print(predictions)
last_window = np.concatenate((last_window, predictions), axis=0)
last_window

[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]
[[nan nan nan]
 [nan nan nan]]


array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [27]:
features_shape = lags_shape + levels_encoded_shape + exog_shape
features = np.full(shape=(n_levels, features_shape), fill_value=np.nan, dtype=float)
features

array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])

In [28]:
if encoding is not None:
    features[:, lags_shape:lags_shape + levels_encoded_shape] = levels_encoded
features

array([[nan, nan, nan, nan, nan,  1.,  0.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  1.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  0.,  1., nan, nan]])

In [29]:
step = 1
i = 0
features[:, :lags_shape] = last_window[-lags - (steps - i), :].transpose()
features


array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., nan, nan]])

In [30]:
if exog is not None:
    features[:, -exog_shape:] = exog[step][i, ].transpose()
features

array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., 99., 99.]])

In [31]:
predictions

array([[nan, nan, nan],
       [nan, nan, nan]])

In [32]:
pred = forecaster.regressor.predict(features)
pred

array([12.92505194, 12.67123086, 13.06597161])

In [33]:
predictions[i, :] = pred
predictions

array([[12.92505194, 12.67123086, 13.06597161],
       [        nan,         nan,         nan]])

In [34]:
last_window

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [35]:
last_window[-(steps - i), :] = pred
last_window

array([[ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [12.92505194, 12.67123086, 13.06597161],
       [        nan,         nan,         nan]])

In [36]:
#data = fetch_dataset(name="items_sales")
#data.to_parquet('items_sales_2.parquet', index=True)
data = pd.read_parquet('items_sales_2.parquet')
data = data.asfreq('D')
data.head()
exog = pd.DataFrame({
    'exog_1': np.random.normal(loc=0, scale=1, size=data.shape[0]),
    'exog_2': np.random.normal(loc=0, scale=1, size=data.shape[0]),
}, index=data.index)

end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

forecaster.fit(series=data_train, exog=exog.loc[data_train.index])

(
            last_window_values_dict,
            exog_values_dict,
            levels,
            prediction_index,
            _
        ) = forecaster._create_predict_inputs(
    steps       = 2,
    levels      = None,
    last_window = None,
    exog        = exog.loc[data_test.index]
)

In [37]:
exog_values_dict

{'item_1': array([[ 1.35268457, -0.8408269 ],
        [-0.52355794,  1.06808229]]),
 'item_2': array([[ 1.35268457, -0.8408269 ],
        [-0.52355794,  1.06808229]]),
 'item_3': array([[ 1.35268457, -0.8408269 ],
        [-0.52355794,  1.06808229]])}

In [38]:
exog_values = np.concat(list(exog_values_dict.values()))
exog_values

array([[ 1.35268457, -0.8408269 ],
       [-0.52355794,  1.06808229],
       [ 1.35268457, -0.8408269 ],
       [-0.52355794,  1.06808229],
       [ 1.35268457, -0.8408269 ],
       [-0.52355794,  1.06808229]])

In [39]:
exog_values_dict_2 = {}
for i in range(steps):
    exog_values_dict_2[i+1] = exog_values[i::steps, :]
exog_values_dict_2
    

{1: array([[ 1.35268457, -0.8408269 ],
        [ 1.35268457, -0.8408269 ],
        [ 1.35268457, -0.8408269 ]]),
 2: array([[-0.52355794,  1.06808229],
        [-0.52355794,  1.06808229],
        [-0.52355794,  1.06808229]])}

In [46]:
predictions_old = forecaster.predict(steps=50, exog=exog.loc[data_test.index])

In [47]:
predictions_new = forecaster.predict_new(steps=50, exog=exog.loc[data_test.index])

In [48]:
predictions_old.equals(predictions_new)

True

In [49]:
n_series = 1000
n=365
index = pd.date_range(start='2021-01-01',periods=n, freq="D")
data = pd.DataFrame(index=index)
for i in range(n_series):
    data[f"series_{i+1}"] = np.random.normal(size=n)
data

  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.ra

Unnamed: 0,series_1,series_2,series_3,series_4,series_5,series_6,series_7,series_8,series_9,series_10,...,series_991,series_992,series_993,series_994,series_995,series_996,series_997,series_998,series_999,series_1000
2021-01-01,0.406959,0.781412,2.145115,0.764731,0.376214,-0.042247,-0.914161,-0.541617,-0.889500,0.168426,...,1.369095,-0.169771,-2.114391,-0.049101,0.767924,-0.618530,-0.023383,0.834772,0.337903,-1.135176
2021-01-02,-0.579615,-0.304340,-0.937455,0.250440,-0.014133,0.537913,3.524318,0.284322,-0.873990,0.602891,...,-0.109506,-0.172210,-2.211210,0.552471,2.026242,0.814681,2.389490,0.921593,0.936692,0.446536
2021-01-03,0.238705,2.067274,-0.071668,-0.317778,-0.981422,-0.371923,-1.054304,-0.874942,-0.000596,-0.263920,...,-1.227184,0.087526,1.321698,1.444148,-0.305799,-0.594362,1.270950,0.741529,2.352885,-0.434023
2021-01-04,-0.110353,1.043471,-0.587150,0.996828,0.289161,-0.402703,-1.837330,-0.065318,-0.136285,-0.060709,...,2.384085,-0.370962,-0.408669,-1.643405,0.220739,-0.512599,-1.565575,-0.714055,-0.397501,0.627271
2021-01-05,-0.188771,-0.655184,-0.496772,0.722567,1.125849,0.739921,-0.058075,-1.746798,1.492872,0.163631,...,0.379669,0.072349,-0.382009,-1.088258,-0.483211,-0.877050,0.823628,-0.422009,0.564739,-0.705260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,0.878206,0.851660,-0.566279,0.362162,0.250390,-1.743420,-0.118865,-0.447480,-1.317584,-1.660888,...,0.669900,0.804400,0.998844,-0.570624,0.538672,0.377569,-0.088107,-0.767196,0.404804,1.413955
2021-12-28,1.296372,0.931109,0.033011,-1.594373,-1.121167,0.125849,0.055196,-0.521530,-1.636330,-1.028128,...,-2.224454,-0.576949,-0.129624,-1.233961,0.573314,-0.771293,0.492860,-0.449070,0.468295,0.200734
2021-12-29,-1.408955,0.432037,-1.496001,-0.021966,-0.562176,-0.685156,-0.880407,-0.654587,-0.988412,-1.228541,...,-0.198642,-0.244582,0.217183,0.201371,0.302790,1.640987,-0.237651,-1.384825,-0.419839,-0.026939
2021-12-30,-0.328865,0.309410,0.927123,0.830060,-1.080565,0.458992,-0.180931,0.256131,0.214592,-0.196064,...,-0.291660,-0.900789,-1.277453,-0.245256,0.561847,0.436391,-0.779076,-1.826527,0.017244,0.476831


In [50]:
forecaster.fit(series = data)

In [51]:
%%timeit
forecaster.predict(steps=5)

3.12 s ± 119 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [54]:
%%timeit
forecaster.predict_new(steps=5)

427 ms ± 63.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [55]:
forecaster.predict(steps=5).equals(forecaster.predict_new(steps=5))

True