In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [2]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from lightgbm import LGBMRegressor

from skforecast.datasets import fetch_dataset
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries

In [3]:
# Data download
# ==============================================================================
data = fetch_dataset(name="items_sales")
data.head()

items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)


Unnamed: 0_level_0,item_1,item_2,item_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01,8.253175,21.047727,19.429739
2012-01-02,22.777826,26.578125,28.009863
2012-01-03,27.549099,31.751042,32.078922
2012-01-04,25.895533,24.567708,27.252276
2012-01-05,21.379238,18.191667,20.357737


In [4]:
# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

print(
    f"Train dates : {data_train.index.min()} --- {data_train.index.max()}   "
    f"(n={len(data_train)})"
)
print(
    f"Test dates  : {data_test.index.min()} --- {data_test.index.max()}   "
    f"(n={len(data_test)})"
)

Train dates : 2012-01-01 00:00:00 --- 2014-07-15 00:00:00   (n=927)
Test dates  : 2014-07-16 00:00:00 --- 2015-01-01 00:00:00   (n=170)


In [51]:
# Generate exogenous variable month
# ==============================================================================
data_exog = data.copy()
data_exog['month'] = data_exog.index.month

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_exog_train = data_exog.loc[:end_train, :].copy()
data_exog_test  = data_exog.loc[end_train:, :].copy()

data_exog_train.head(3)

Unnamed: 0_level_0,item_1,item_2,item_3,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-01,8.253175,21.047727,19.429739,1
2012-01-02,22.777826,26.578125,28.009863,1
2012-01-03,27.549099,31.751042,32.078922,1


In [53]:
# Create and fit a Forecaster Multi-Series
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, n_estimators=10, verbose=-1),
                 lags               = 3,
                 encoding           = 'ordinal',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(
    series = data_exog_train[['item_1', 'item_2', 'item_3']], 
    exog   = data_exog_train[['month']]
)

In [54]:
X_train = forecaster.create_train_X_y(series=data_train)[0]
X_train

Unnamed: 0_level_0,lag_1,lag_2,lag_3,_level_skforecast
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-04,27.549099,22.777826,8.253175,0
2012-01-05,25.895533,27.549099,22.777826,0
2012-01-06,21.379238,25.895533,27.549099,0
2012-01-07,21.106643,21.379238,25.895533,0
2012-01-08,20.533871,21.106643,21.379238,0
...,...,...,...,...
2014-07-11,9.677730,12.199832,14.072343,2
2014-07-12,10.396751,9.677730,12.199832,2
2014-07-13,16.139173,10.396751,9.677730,2
2014-07-14,13.028927,16.139173,10.396751,2


In [7]:
lw = forecaster.last_window['item_1'] + 10
lw = lw.to_frame(name='item_4')
lw

Unnamed: 0_level_0,item_4
date,Unnamed: 1_level_1
2014-07-13,32.609388
2014-07-14,33.307307
2014-07-15,35.980745


In [10]:
forecaster.predict(steps=1, levels='item_4', last_window=lw)



Unnamed: 0,item_4
2014-07-16,23.893397


In [9]:
forecaster.predict(steps=1, levels='item_4', last_window=None)



ValueError: No series to predict. None of the series ['item_4'] are present in `last_window` attribute. Provide `last_window` as argument in predict method.

In [11]:
X_predict = forecaster.create_predict_X(steps=1, levels='item_4', last_window=lw)['item_4']
X_predict



Unnamed: 0,lag_1,lag_2,lag_3,_level_skforecast
2014-07-16,35.980745,33.307307,32.609388,


In [None]:
forecaster.regressor.predict(X_predict)

array([23.89339724])

In [None]:
X_predict['_level_skforecast'] = np.nan
X_predict

Unnamed: 0,lag_1,lag_2,lag_3,_level_skforecast
2014-07-16,35.980745,33.307307,32.609388,


In [None]:
forecaster.regressor.predict(X_predict)

array([23.89339724])

In [None]:
lw =  pd.DataFrame(forecaster.last_window)
lw['item_4'] = lw['item_1'] + 10
lw

Unnamed: 0_level_0,item_1,item_2,item_3,item_4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-07-13,22.609388,8.1,13.028927,32.609388
2014-07-14,23.307307,10.895833,9.315334,33.307307
2014-07-15,25.980745,10.489583,9.908915,35.980745


In [None]:
forecaster.predict(steps=1, levels=['item_1', 'item_4'], last_window=lw)



Unnamed: 0,item_1,item_4
2014-07-16,22.778094,23.893397


In [None]:
forecaster.predict_interval(steps=1, levels='item_4', last_window=lw)



Unnamed: 0,item_4,item_4_lower_bound,item_4_upper_bound
2014-07-16,23.893397,18.890479,28.713671


In [None]:
forecaster.predict_interval(steps=1, levels=['item_1', 'item_4'], last_window=lw)



Unnamed: 0,item_1,item_1_lower_bound,item_1_upper_bound,item_4,item_4_lower_bound,item_4_upper_bound
2014-07-16,22.778094,21.672791,27.20174,23.893397,18.890479,28.713671


In [None]:
forecaster.predict_interval(steps=1, levels='item_4', last_window=None)



ValueError: No series to predict. None of the series ['item_4'] are present in `last_window` attribute. Provide `last_window` as argument in predict method.

In [None]:
forecaster.predict_interval(steps=1, levels=['item_1', 'item_4'], last_window=None)



Unnamed: 0,item_1,item_1_lower_bound,item_1_upper_bound
2014-07-16,22.778094,21.672791,27.20174


In [None]:
forecaster.set_out_sample_residuals(residuals=forecaster.in_sample_residuals)

In [None]:
forecaster.out_sample_residuals.keys()

dict_keys(['item_1', 'item_2', 'item_3', '_unknown_level'])

In [None]:
forecaster.predict_interval(steps=1, levels='item_4', last_window=lw, in_sample_residuals=False)



Unnamed: 0,item_4,item_4_lower_bound,item_4_upper_bound
2014-07-16,23.893397,19.701222,29.869204


In [87]:
# Create and fit a Forecaster Multi-Series
# ==============================================================================
# Generate exogenous variable month
# ==============================================================================
data_exog = data.copy()
data_exog['month'] = data_exog.index.month

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_exog_train = data_exog.loc[:end_train, :].copy()
data_exog_test  = data_exog.loc[end_train:, :].copy()

data_exog_train.head(3)

forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, n_estimators=10, verbose=-1),
                 lags               = 3,
                 encoding           = 'ordinal',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(
    series = data_exog_train[['item_1', 'item_2', 'item_3']], 
    exog   = data_exog_train[['month']]
)

In [88]:
steps  = 5
level = 'item_1'

(
    last_window_values_dict,
    exog_values_dict,
    levels,
    prediction_index,
    _
) = forecaster._create_predict_inputs(
    steps       = steps,
    levels      = level,
    exog        = data_exog_test[['month']]
)


In [89]:
forecaster._recursive_predict(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

array([22.80722376, 21.06453624, 20.16995613, 20.79247993, 21.18398596])

In [90]:
forecaster._recursive_predict_new(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

array([22.80722376, 21.06453624, 20.16995613, 20.79247993, 21.18398596])

In [91]:
%%timeit -r 10 -n 100

forecaster._recursive_predict(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

2.2 ms ± 279 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [95]:
%%timeit -r 10 -n 100

forecaster._recursive_predict_new(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

2.04 ms ± 159 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [93]:
%%timeit -r 10 -n 100

forecaster._recursive_predict_old(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

1.97 ms ± 221 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
