In [3]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/ubuntu/varios/skforecast'

In [4]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import HistGradientBoostingRegressor
from lightgbm import LGBMRegressor

from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries

# Data download
# ==============================================================================
url = (
       'https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/'
       'data/simulated_items_sales.csv'
)
data = pd.read_csv(url, sep=',')

# Data preparation
# ==============================================================================
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data = data.set_index('date')
data = data.asfreq('D')
data = data.sort_index()
data.head()

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

print(
    f"Train dates : {data_train.index.min()} --- {data_train.index.max()}   "
    f"(n={len(data_train)})"
)
print(
    f"Test dates  : {data_test.index.min()} --- {data_test.index.max()}   "
    f"(n={len(data_test)})"
)

Train dates : 2012-01-01 00:00:00 --- 2014-07-15 00:00:00   (n=927)
Test dates  : 2014-07-16 00:00:00 --- 2015-01-01 00:00:00   (n=170)


In [15]:
# Create and fit forecaster multi series
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 24,
                 encoding           = 'ordinal_category',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 forecaster_id      = None,
                 #fit_kwargs={'categorical_feature':'auto'}
             )

forecaster.fit(series=data_train)


print(forecaster.encoding_mapping)

# Show categorical features if present
cat_index = forecaster.regressor.booster_.params.get('categorical_column')
if cat_index is not None:
    features = forecaster.regressor.booster_.feature_name()
    print([features[i] for i in cat_index])

X_train, y_train, series_indexes, series_col_names, exog_col_names, exog_dtypes = forecaster.create_train_X_y(data_train)
print(X_train.dtypes)

forecaster.predict(steps=10)

{'item_1': 0, 'item_2': 1, 'item_3': 2}
['_level_skforecast']
lag_1                 float64
lag_2                 float64
lag_3                 float64
lag_4                 float64
lag_5                 float64
lag_6                 float64
lag_7                 float64
lag_8                 float64
lag_9                 float64
lag_10                float64
lag_11                float64
lag_12                float64
lag_13                float64
lag_14                float64
lag_15                float64
lag_16                float64
lag_17                float64
lag_18                float64
lag_19                float64
lag_20                float64
lag_21                float64
lag_22                float64
lag_23                float64
lag_24                float64
_level_skforecast    category
dtype: object


Unnamed: 0,item_1,item_2,item_3
2014-07-16,25.860322,10.589852,11.821867
2014-07-17,25.710671,11.249642,10.875323
2014-07-18,25.25406,11.214945,12.383875
2014-07-19,24.135732,11.39277,11.752806
2014-07-20,21.717562,11.092514,11.108611
2014-07-21,22.869126,11.367802,10.028264
2014-07-22,25.297145,10.610384,9.839174
2014-07-23,25.480385,12.443352,12.550434
2014-07-24,25.52458,11.56397,11.957162
2014-07-25,25.376043,11.425697,10.186801


In [16]:
# Create and fit forecaster multi series
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 24,
                 encoding           = 'ordinal',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 forecaster_id      = None,
                 #fit_kwargs={'categorical_feature':'auto'}
             )

forecaster.fit(series=data_train)


print(forecaster.encoding_mapping)

# Show categorical features if present
cat_index = forecaster.regressor.booster_.params.get('categorical_column')
if cat_index is not None:
    features = forecaster.regressor.booster_.feature_name()
    print([features[i] for i in cat_index])

X_train, y_train, series_indexes, series_col_names, exog_col_names, exog_dtypes = forecaster.create_train_X_y(data_train)
print(X_train.dtypes)

forecaster.predict(steps=10)

{'item_1': 0, 'item_2': 1, 'item_3': 2}
lag_1                float64
lag_2                float64
lag_3                float64
lag_4                float64
lag_5                float64
lag_6                float64
lag_7                float64
lag_8                float64
lag_9                float64
lag_10               float64
lag_11               float64
lag_12               float64
lag_13               float64
lag_14               float64
lag_15               float64
lag_16               float64
lag_17               float64
lag_18               float64
lag_19               float64
lag_20               float64
lag_21               float64
lag_22               float64
lag_23               float64
lag_24               float64
_level_skforecast      int64
dtype: object


Unnamed: 0,item_1,item_2,item_3
2014-07-16,25.906323,10.522491,12.034587
2014-07-17,25.807194,10.623789,10.503966
2014-07-18,25.127355,11.299802,12.206434
2014-07-19,23.902609,11.441606,12.61874
2014-07-20,21.660527,11.658107,12.148873
2014-07-21,22.756076,11.377895,10.66693
2014-07-22,25.064381,10.869464,10.115581
2014-07-23,25.070926,12.064482,11.608842
2014-07-24,25.149565,10.882162,11.782225
2014-07-25,25.121728,10.755604,10.307792


In [17]:
# Create and fit forecaster multi series
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 24,
                 encoding           = 'onehot',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 forecaster_id      = None,
                 #fit_kwargs={'categorical_feature':'auto'}
             )

forecaster.fit(series=data_train)


print(forecaster.encoding_mapping)

# Show categorical features if present
cat_index = forecaster.regressor.booster_.params.get('categorical_column')
if cat_index is not None:
    features = forecaster.regressor.booster_.feature_name()
    print([features[i] for i in cat_index])

X_train, y_train, series_indexes, series_col_names, exog_col_names, exog_dtypes = forecaster.create_train_X_y(data_train)
print(X_train.dtypes)

forecaster.predict(steps=10)

{'item_1': 0, 'item_2': 1, 'item_3': 2}
lag_1     float64
lag_2     float64
lag_3     float64
lag_4     float64
lag_5     float64
lag_6     float64
lag_7     float64
lag_8     float64
lag_9     float64
lag_10    float64
lag_11    float64
lag_12    float64
lag_13    float64
lag_14    float64
lag_15    float64
lag_16    float64
lag_17    float64
lag_18    float64
lag_19    float64
lag_20    float64
lag_21    float64
lag_22    float64
lag_23    float64
lag_24    float64
item_1      int64
item_2      int64
item_3      int64
dtype: object


Unnamed: 0,item_1,item_2,item_3
2014-07-16,25.860322,10.589852,11.821867
2014-07-17,25.710671,11.249642,10.875323
2014-07-18,25.25406,11.214945,12.383875
2014-07-19,24.135732,11.39277,11.752806
2014-07-20,21.717562,11.092514,11.108611
2014-07-21,22.869126,11.367802,10.028264
2014-07-22,25.297145,10.610384,9.839174
2014-07-23,25.480385,12.443352,12.550434
2014-07-24,25.52458,11.56397,11.957162
2014-07-25,25.376043,11.425697,10.186801


In [25]:
forecaster.regressor.feature_names_in_[forecaster.regressor.is_categorical_]

array(['_level_skforecast'], dtype=object)

In [26]:
# Create and fit forecaster multi series
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = HistGradientBoostingRegressor(random_state=123, categorical_features=['_level_skforecast']),
                 lags               = 24,
                 encoding           = 'ordinal_category',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 forecaster_id      = None
             )

forecaster.fit(series=data_train)


print(forecaster.encoding_mapping)

# Show categorical features if present
cat_index = forecaster.regressor.is_categorical_
if cat_index is not None:
    features = forecaster.regressor.feature_names_in_
    print(features[cat_index])

X_train, y_train, series_indexes, series_col_names, exog_col_names, exog_dtypes = forecaster.create_train_X_y(data_train)
print(X_train.dtypes)

forecaster.predict(steps=10)

{'item_1': 0, 'item_2': 1, 'item_3': 2}
['_level_skforecast']
lag_1                 float64
lag_2                 float64
lag_3                 float64
lag_4                 float64
lag_5                 float64
lag_6                 float64
lag_7                 float64
lag_8                 float64
lag_9                 float64
lag_10                float64
lag_11                float64
lag_12                float64
lag_13                float64
lag_14                float64
lag_15                float64
lag_16                float64
lag_17                float64
lag_18                float64
lag_19                float64
lag_20                float64
lag_21                float64
lag_22                float64
lag_23                float64
lag_24                float64
_level_skforecast    category
dtype: object


Unnamed: 0,item_1,item_2,item_3
2014-07-16,25.792675,11.173958,12.077978
2014-07-17,25.546403,10.943413,10.099237
2014-07-18,25.233413,11.594754,12.391873
2014-07-19,23.950772,11.603061,12.225715
2014-07-20,21.515123,11.582226,11.18319
2014-07-21,22.310733,11.005379,10.95011
2014-07-22,24.708569,11.527696,10.707694
2014-07-23,25.092788,11.724573,12.762186
2014-07-24,25.394611,10.466541,11.711561
2014-07-25,25.516455,10.938599,11.257149
