In [181]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'c:\\Users\\Joaquín Amat\\Documents\\GitHub\\skforecast'

In [182]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

from skforecast.datasets import fetch_dataset
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries


# Data download
# ==============================================================================
#data = fetch_dataset(name="items_sales")
#data.to_parquet('items_sales.parquet', index=True)
data = pd.read_parquet('items_sales.parquet')
data.head()
exog = pd.DataFrame({
    'exog_1': np.random.normal(loc=0, scale=1, size=data.shape[0]),
    'exog_2': np.random.normal(loc=0, scale=1, size=data.shape[0]),
}, index=data.index)

end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()


# Create and train ForecasterAutoregMultiSeries
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 5,
                 encoding           = 'onehot',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(series=data_train, exog=exog.loc[data_train.index])
forecaster



ForecasterAutoregMultiSeries 
Regressor: LGBMRegressor(random_state=123, verbose=-1) 
Lags: [1 2 3 4 5] 
Transformer for series: None 
Transformer for exog: None 
Series encoding: onehot 
Window size: 5 
Series levels (names): ['item_1', 'item_2', 'item_3'] 
Series weights: None 
Weight function included: False 
Differentiation order: None 
Exogenous included: True 
Type of exogenous variable: <class 'pandas.core.frame.DataFrame'> 
Exogenous variables names: ['exog_1', 'exog_2'] 
Training range: ["'item_1': ['0', '926']", "'item_2': ['0', '926']", "'item_3': ['0', '926']"] 
Training index type: RangeIndex 
Training index frequency: 1 
Regressor parameters: boosting_type: gbdt, class_weight: None, colsample_bytree: 1.0, importance_type: split, learning_rate: 0.1, ... 
fit_kwargs: {} 
Creation date: 2024-08-17 00:25:34 
Last fit date: 2024-08-17 00:25:34 
Skforecast version: 0.13.0 
Python version: 3.12.4 
Forecaster id: None 

In [183]:
levels = ['item_1', 'item_2', 'item_3']
n_levels = len(levels)
lags = np.array([1, 2, 3, 4, 5])
steps = 2
lags_shape = len(lags)
exog_shape = 2
encoding = 'onehot'
series_col_names = levels
encoding_mapping = {'item_1': 0, 'item_2': 1, 'item_3': 2}
# Exog es un diccionario donde las claves son el step y los son numpy arrays en los
# que cada fila es un level y cada columna es una variable exogena.
exog = {
    1:np.full(shape=(steps, exog_shape), fill_value=99, dtype=float),
    2:np.full(shape=(steps, exog_shape), fill_value=999, dtype=float),
}
exog

{1: array([[99., 99.],
        [99., 99.]]),
 2: array([[999., 999.],
        [999., 999.]])}

In [184]:
if encoding is not None:
    if encoding == 'onehot':
        levels_encoded = np.zeros((n_levels, len(series_col_names)), dtype=float)
        for idx, level in enumerate(levels):
            if level in series_col_names:
                levels_encoded[idx, series_col_names.index(level)] = 1.
    else:
        levels_encoded = np.array([encoding_mapping.get(level, None) for level in levels], dtype='float64').reshape(-1, 1)
    levels_encoded_shape = levels_encoded.shape[1]
else:
    levels_encoded_shape = 0
levels_encoded

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [185]:
last_window =  np.full(shape=(lags_shape, n_levels), fill_value=5, dtype=float)
predictions =  np.full(shape=(steps, n_levels), fill_value=np.nan, dtype=float)
print(last_window)
print(predictions)
last_window = np.concatenate((last_window, predictions), axis=0)
last_window

[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]
[[nan nan nan]
 [nan nan nan]]


array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [186]:
features_shape = lags_shape + levels_encoded_shape + exog_shape
features = np.full(shape=(n_levels, features_shape), fill_value=np.nan, dtype=float)
features

array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])

In [187]:
if encoding is not None:
    features[:, lags_shape:lags_shape + levels_encoded_shape] = levels_encoded
features

array([[nan, nan, nan, nan, nan,  1.,  0.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  1.,  0., nan, nan],
       [nan, nan, nan, nan, nan,  0.,  0.,  1., nan, nan]])

In [188]:
step = 1
i = 0
features[:, :lags_shape] = last_window[-lags - (steps - i), :].transpose()
features


array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., nan, nan],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., nan, nan]])

In [189]:
if exog is not None:
    features[:, -exog_shape:] = exog[step][i, ].transpose()
features

array([[ 5.,  5.,  5.,  5.,  5.,  1.,  0.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  1.,  0., 99., 99.],
       [ 5.,  5.,  5.,  5.,  5.,  0.,  0.,  1., 99., 99.]])

In [190]:
predictions

array([[nan, nan, nan],
       [nan, nan, nan]])

In [191]:
pred = forecaster.regressor.predict(features)
pred

array([10.5630835 , 10.61145335, 10.4745736 ])

In [192]:
predictions[i, :] = pred
predictions

array([[10.5630835 , 10.61145335, 10.4745736 ],
       [        nan,         nan,         nan]])

In [193]:
last_window

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [nan, nan, nan],
       [nan, nan, nan]])

In [194]:
last_window[-(steps - i), :] = pred
last_window

array([[ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [ 5.        ,  5.        ,  5.        ],
       [10.5630835 , 10.61145335, 10.4745736 ],
       [        nan,         nan,         nan]])

In [195]:
#data = fetch_dataset(name="items_sales")
#data.to_parquet('items_sales_2.parquet', index=True)
data = pd.read_parquet('items_sales_2.parquet')
data = data.asfreq('D')
data.head()
exog = pd.DataFrame({
    'exog_1': np.random.normal(loc=0, scale=1, size=data.shape[0]),
    'exog_2': np.random.normal(loc=0, scale=1, size=data.shape[0]),
}, index=data.index)

end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

forecaster.fit(series=data_train, exog=exog.loc[data_train.index])

(
            last_window_values_dict,
            exog_values_dict,
            levels,
            prediction_index,
            _
        ) = forecaster._create_predict_inputs(
    steps       = 2,
    levels      = None,
    last_window = None,
    exog        = exog.loc[data_test.index]
)

In [196]:
exog_values_dict

{'item_1': array([[ 0.71745929,  0.28669447],
        [-0.94999823, -0.94994859]]),
 'item_2': array([[ 0.71745929,  0.28669447],
        [-0.94999823, -0.94994859]]),
 'item_3': array([[ 0.71745929,  0.28669447],
        [-0.94999823, -0.94994859]])}

In [197]:
exog_values = np.concat(list(exog_values_dict.values()))
exog_values

array([[ 0.71745929,  0.28669447],
       [-0.94999823, -0.94994859],
       [ 0.71745929,  0.28669447],
       [-0.94999823, -0.94994859],
       [ 0.71745929,  0.28669447],
       [-0.94999823, -0.94994859]])

In [198]:
exog_values_dict_2 = {}
for i in range(steps):
    exog_values_dict_2[i+1] = exog_values[i::steps, :]
exog_values_dict_2
    

{1: array([[0.71745929, 0.28669447],
        [0.71745929, 0.28669447],
        [0.71745929, 0.28669447]]),
 2: array([[-0.94999823, -0.94994859],
        [-0.94999823, -0.94994859],
        [-0.94999823, -0.94994859]])}

In [199]:
predictions_old = forecaster.predict(steps=50, exog=exog.loc[data_test.index])

In [200]:
predictions_new = forecaster.predict_new(steps=50, exog=exog.loc[data_test.index])

In [201]:
predictions_old.equals(predictions_new)

True

In [207]:
n_series = 1000
n=365
index = pd.date_range(start='2021-01-01',periods=n, freq="D")
data = pd.DataFrame(index=index)
for i in range(n_series):
    data[f"series_{i+1}"] = np.random.normal(size=n)
data

  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.random.normal(size=n)
  data[f"series_{i+1}"] = np.ra

Unnamed: 0,series_1,series_2,series_3,series_4,series_5,series_6,series_7,series_8,series_9,series_10,...,series_991,series_992,series_993,series_994,series_995,series_996,series_997,series_998,series_999,series_1000
2021-01-01,0.459607,0.812563,1.226431,2.179314,-1.154215,-0.429972,-0.856183,1.679390,-0.829975,-0.781760,...,-1.238420,1.031412,-0.379043,1.117513,-0.288710,-1.522633,0.869183,0.834237,0.024823,-0.941784
2021-01-02,0.070358,-1.256666,-1.425716,-1.086238,-1.132526,-0.105147,0.156575,0.552880,-0.387301,-0.792668,...,0.793684,0.275992,0.362580,-0.920835,0.692968,-0.537294,-1.277848,-0.554939,2.573138,0.842919
2021-01-03,-1.602398,-1.070124,-1.348625,0.637208,1.734226,-0.349824,0.037684,0.111998,1.431537,0.523031,...,0.186039,-0.283988,-1.777567,0.564709,0.226534,1.363176,0.624943,-0.564722,0.001652,0.242821
2021-01-04,-1.648315,-1.915004,0.954499,-0.518224,-0.315183,-1.033250,0.817404,1.595141,-0.568498,-0.279967,...,-2.102379,0.857071,0.547570,-0.922204,1.528727,-0.131264,0.235377,-0.876907,2.679344,1.782418
2021-01-05,0.054588,-0.307976,2.080736,0.691205,-0.431460,-0.276800,1.363193,-0.677981,-0.006413,1.014594,...,0.412186,-0.535376,-0.985532,0.169298,-0.277085,-0.168362,0.312922,-1.789081,-0.157171,-0.588903
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,0.657898,0.822759,1.242705,0.969374,0.814548,-0.825139,0.489492,1.201646,0.585756,-0.954348,...,1.123042,-1.113476,0.286141,-0.250320,0.242213,0.316973,-1.027013,0.400637,0.999708,0.591375
2021-12-28,1.074457,0.568025,-1.184057,1.882000,0.591357,0.010179,-1.160976,0.200898,1.896302,-0.844265,...,-1.110428,-0.624965,-0.198429,3.121358,0.006099,0.193604,-0.751852,-0.318429,-1.279038,1.348535
2021-12-29,-1.272056,-0.391036,-0.489025,1.805529,0.370529,0.878203,-0.215573,1.374078,0.922482,-0.013045,...,-1.122657,-0.225483,-0.121312,0.381117,0.757074,0.410061,1.990995,-0.011111,-1.196892,-0.458690
2021-12-30,-0.214785,-0.070895,-0.623965,1.060798,0.040662,0.782276,0.507518,-1.198748,-1.039933,0.760274,...,0.927136,-0.275238,-0.297127,-1.245870,-0.858263,-0.719257,0.856057,-0.749774,1.233099,-1.845302


In [208]:
forecaster.fit(series = data)

In [218]:
%%timeit
forecaster.predict(steps=5)

4.33 s ± 803 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [219]:
%%timeit
forecaster.predict_new(steps=5)

219 ms ± 7.66 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [221]:
forecaster.predict(steps=5).equals(forecaster.predict_new(steps=5))

True