In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [4]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries

In [46]:
# Data download
# ==============================================================================
url = (
       'https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/'
       'data/simulated_items_sales.csv'
)
data = pd.read_csv(url, sep=',')

# Data preparation
# ==============================================================================
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data = data.set_index('date')
data = data.asfreq('D')
data = data.sort_index()
data.head()

Unnamed: 0_level_0,item_1,item_2,item_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01,8.253175,21.047727,19.429739
2012-01-02,22.777826,26.578125,28.009863
2012-01-03,27.549099,31.751042,32.078922
2012-01-04,25.895533,24.567708,27.252276
2012-01-05,21.379238,18.191667,20.357737


In [47]:
data.index.min(), data.index.max()

(Timestamp('2012-01-01 00:00:00'), Timestamp('2015-01-01 00:00:00'))

In [48]:
# Delete observations of item_2 and item_3
# ==============================================================================
data['item_2'].loc[: '2013-01-01'] = np.nan
data['item_3'].loc[: '2013-07-13'] = np.nan

data.isna().sum()

item_1      0
item_2    367
item_3    560
dtype: int64

In [49]:
# Add exog
# ==============================================================================
data['exog_1'] = np.arange(len(data))
data.head()

Unnamed: 0_level_0,item_1,item_2,item_3,exog_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-01,8.253175,,,0
2012-01-02,22.777826,,,1
2012-01-03,27.549099,,,2
2012-01-04,25.895533,,,3
2012-01-05,21.379238,,,4


In [50]:
# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

print(
    f"Train dates : {data_train.index.min()} --- {data_train.index.max()}   "
    f"(n={len(data_train)})"
)
print(
    f"Test dates  : {data_test.index.min()} --- {data_test.index.max()}   "
    f"(n={len(data_test)})"
)

Train dates : 2012-01-01 00:00:00 --- 2014-07-15 00:00:00   (n=927)
Test dates  : 2014-07-16 00:00:00 --- 2015-01-01 00:00:00   (n=170)


In [51]:
# Create and fit forecaster multi series
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = Ridge(random_state=123),
                 lags               = 5,
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None
             )

forecaster.fit(series=data_train[['item_1', 'item_2', 'item_3']],
               exog=data_train['exog_1'])
forecaster

ForecasterAutoregMultiSeries 
Regressor: Ridge(random_state=123) 
Lags: [1 2 3 4 5] 
Transformer for series: None 
Transformer for exog: None 
Window size: 5 
Series levels (names): ['item_1', 'item_2', 'item_3'] 
Series weights: None 
Weight function included: False 
Exogenous included: True 
Type of exogenous variable: <class 'pandas.core.series.Series'> 
Exogenous variables names: ['exog_1'] 
Training range: [Timestamp('2012-01-01 00:00:00'), Timestamp('2014-07-15 00:00:00')] 
Training index type: DatetimeIndex 
Training index frequency: D 
Regressor parameters: {'alpha': 1.0, 'copy_X': True, 'fit_intercept': True, 'max_iter': None, 'positive': False, 'random_state': 123, 'solver': 'auto', 'tol': 0.0001} 
fit_kwargs: {} 
Creation date: 2023-06-06 18:35:24 
Last fit date: 2023-06-06 18:35:24 
Skforecast version: 0.9.0 
Python version: 3.10.11 
Forecaster id: None 

In [56]:
X_train, y_train, y_index, y_train_index = forecaster.create_train_X_y(data_train[['item_1', 'item_2', 'item_3']])

# data_train

print("data_train")
print(data_train.shape)
print("")

# X_train

len_1 = len(X_train.loc[X_train['item_1'] == 1.])
len_2 = len(X_train.loc[X_train['item_2'] == 1.])
len_3 = len(X_train.loc[X_train['item_3'] == 1.])

print("X_train")
print(X_train.shape)
print('item_1 len:', len_1)
print('item_2 len:', len_2)
print('item_3 len:', len_3)
print("len_1 + len_2 + len_3:", len_1+len_2+len_3)
print("")

# y_train

print("y_train")
print(y_train.shape)
print("")

# y_index

print("y_index")
print(y_index.shape)
print("data_train:", data_train.index[0], data_train.index[-1])
print("y_index:", y_index[0], y_index[-1])
print("")

# y_train_index

print("y_train_index")
print(y_train_index.shape)
print(y_train_index[0], y_train_index[len_1-1])
print(y_train_index[len_1], y_train_index[len_1+len_2-1])
print(y_train_index[len_1+len_2], y_train_index[len_1+len_2+len_3-1])
print("")


data_train
(927, 4)

X_train
(1839, 8)
item_1 len: 922
item_2 len: 555
item_3 len: 362
len_1 + len_2 + len_3: 1839

y_train
(1839,)

y_index
(927,)
data_train: 2012-01-01 00:00:00 2014-07-15 00:00:00
y_index: 2012-01-01 00:00:00 2014-07-15 00:00:00

y_train_index
(1839,)
2012-01-06 00:00:00 2014-07-15 00:00:00
2013-01-07 00:00:00 2014-07-15 00:00:00
2013-07-19 00:00:00 2014-07-15 00:00:00



In [57]:
forecaster.last_window

Unnamed: 0_level_0,item_1,item_2,item_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-07-11,25.662128,11.002083,10.396751
2014-07-12,23.773923,11.008333,16.139173
2014-07-13,22.609388,8.1,13.028927
2014-07-14,23.307307,10.895833,9.315334
2014-07-15,25.980745,10.489583,9.908915


In [58]:
forecaster.predict(5, exog=data_test['exog_1'])

Unnamed: 0,item_1,item_2,item_3
2014-07-16,24.96464,11.386619,11.927721
2014-07-17,23.862185,12.156144,13.466882
2014-07-18,23.376065,12.224168,13.771249
2014-07-19,23.253302,12.649076,13.537742
2014-07-20,23.440808,12.958848,13.657885


## Tests

In [13]:
import re
import pytest
import numpy as np
import pandas as pd
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.exceptions import MissingValuesExogWarning
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

In [19]:
series = pd.DataFrame({'l1': np.arange(10, dtype=float), 
                           'l2': pd.Series([np.nan, np.nan, 
                                            2., 3., 4., 5., 6., 7., 8., 9.]), 
                           'l3': pd.Series([np.nan, np.nan, np.nan, np.nan, 
                                            4., 5., 6., 7., 8., 9.])})
series.index = pd.date_range("1990-01-01", periods=10, freq='D')
exog = pd.DataFrame({
            'col_1': [7.5, 24.4, 60.3, 57.3, 50.7, 41.4, 24.4, 87.2, 47.4, 23.8],
            'col_2': ['a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b']},
            index = pd.date_range("1990-01-01", periods=10, freq='D'))

transformer_exog = ColumnTransformer(
                        [('scale', StandardScaler(), ['col_1']),
                            ('onehot', OneHotEncoder(), ['col_2'])],
                        remainder = 'passthrough',
                        verbose_feature_names_out = False
                    )

forecaster = ForecasterAutoregMultiSeries(
                    regressor          = LinearRegression(),
                    lags               = 3,
                    transformer_series = StandardScaler(),
                    transformer_exog   = transformer_exog
                )
results = forecaster.create_train_X_y(series=series, exog=exog)

In [37]:
print(list(results[1].values))

[-0.5222329678670935, -0.17407765595569785, 0.17407765595569785, 0.5222329678670935, 0.8703882797784892, 1.2185435916898848, 1.5666989036012806, -0.2182178902359924, 0.2182178902359924, 0.6546536707079772, 1.091089451179962, 1.5275252316519468, 0.29277002188455997, 0.8783100656536799, 1.4638501094227998]


In [38]:
X_train = pd.DataFrame(
              data = np.array(
                         [[0.58142898, 0.65138268, 0.12362923, 1., 0.],
                          [0.72969992, 0.58142898, 0.65138268, 1., 0.],
                          [0.97790567, 0.72969992, 0.58142898, 1., 0.],
                          [0.56924731, 0.97790567, 0.72969992, 1., 0.],
                          [0.85369084, 0.56924731, 0.97790567, 1., 0.],
                          [0.75425194, 0.85369084, 0.56924731, 1., 0.],
                          [0.08167939, 0.75425194, 0.85369084, 1., 0.],
                          [0.72350895, 0.11599708, 0.51328688, 0., 1.],
                          [0.10305721, 0.72350895, 0.11599708, 0., 1.],
                          [0.20581485, 0.10305721, 0.72350895, 0., 1.],
                          [0.41262027, 0.20581485, 0.10305721, 0., 1.],
                          [0.82107767, 0.41262027, 0.20581485, 0., 1.],
                          [0.0107816 , 0.82107767, 0.41262027, 0., 1.],
                          [0.94951918, 0.0107816 , 0.82107767, 0., 1.]]
                     ),
                columns = ['lag_1', 'lag_2', 'lag_3', 'series_1', 'series_2']
          )

In [40]:
X_train.drop([7, 8, 9])

Unnamed: 0,lag_1,lag_2,lag_3,series_1,series_2
0,0.581429,0.651383,0.123629,1.0,0.0
1,0.7297,0.581429,0.651383,1.0,0.0
2,0.977906,0.7297,0.581429,1.0,0.0
3,0.569247,0.977906,0.7297,1.0,0.0
4,0.853691,0.569247,0.977906,1.0,0.0
5,0.754252,0.853691,0.569247,1.0,0.0
6,0.081679,0.754252,0.853691,1.0,0.0
10,0.41262,0.205815,0.103057,0.0,1.0
11,0.821078,0.41262,0.205815,0.0,1.0
12,0.010782,0.821078,0.41262,0.0,1.0


In [43]:
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import series
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import exog
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import exog_predict

new_series = series.copy()
new_series['2'].iloc[:10] = np.nan

transformer_exog = ColumnTransformer(
                        [('scale', StandardScaler(), ['col_1']),
                        ('onehot', OneHotEncoder(), ['col_2'])],
                        remainder = 'passthrough',
                        verbose_feature_names_out = False
                    )
forecaster = ForecasterAutoregMultiSeries(
                    regressor          = LinearRegression(),
                    lags               = 5,
                    ..
                    transformer_series = StandardScaler(),
                    transformer_exog   = transformer_exog,
                )
forecaster.fit(series=series, exog=exog)
predictions = forecaster.predict(steps=5, exog=exog_predict)
predictions

Unnamed: 0,1,2
50,0.532673,0.554964
51,0.44478,0.57788
52,0.525796,0.663891
53,0.573911,0.657898
54,0.546336,0.584119


In [44]:
predictions.to_numpy()

array([[0.53267333, 0.55496412],
       [0.44478046, 0.57787982],
       [0.52579563, 0.66389117],
       [0.57391142, 0.65789846],
       [0.54633594, 0.5841187 ]])

In [2]:
# Fixtures

from skforecast.model_selection_multiseries.tests.fixtures_model_selection_multiseries import series

In [5]:
series.head(2)

Unnamed: 0,l1,l2
0,0.696469,0.120629
1,0.286139,0.826341


In [6]:
new_series = series.copy()
new_series['l2'].iloc[:10] = np.nan

In [9]:
forecaster = ForecasterAutoregMultiSeries(regressor=Ridge(random_state=123), 
                                                       lags=2)


metrics_levels, backtest_predictions = backtesting_forecaster_multiseries(
                                               forecaster            = forecaster,
                                               series                = new_series,
                                               steps                 = 5,
                                               levels                = 'l1',
                                               metric                = 'mean_absolute_error',
                                               initial_train_size    = len(new_series) - 20,
                                               gap                   = 5,
                                               allow_incomplete_fold = False,
                                               refit                 = True,
                                               fixed_train_size      = True,
                                               exog                  = new_series['l1'].rename('exog_1'),
                                               interval              = [5, 95],
                                               n_boot                = 150,
                                               random_state          = 123,
                                               in_sample_residuals   = True,
                                               verbose               = False
                                           )

  0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
metrics_levels.to_numpy()

array([['l1', 0.1355099897175138]], dtype=object)

In [19]:
backtest_predictions.to_numpy()

array([[0.42170236, 0.21384761, 0.6338432 ],
       [0.47242371, 0.23550111, 0.68264162],
       [0.66450589, 0.4128072 , 0.87549193],
       [0.67311586, 0.46534536, 0.89662809],
       [0.487886  , 0.27487735, 0.65050566],
       [0.52759202, 0.30254031, 0.7316064 ],
       [0.33021382, 0.11679102, 0.52056867],
       [0.4226274 , 0.16874895, 0.61548288],
       [0.44647715, 0.22033296, 0.65301466],
       [0.61435678, 0.37793909, 0.81285962],
       [0.41671844, 0.13568381, 0.67943385],
       [0.47385345, 0.19025177, 0.76849597],
       [0.62360146, 0.39888165, 0.85075261],
       [0.49407875, 0.24011478, 0.78855477],
       [0.51234652, 0.26828928, 0.81339595]])