In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/ubuntu/varios/skforecast'

In [8]:
# Libraries
# ==============================================================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skforecast.datasets import fetch_dataset
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection import backtesting_forecaster
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
# from sklearn.metrics import mean_absolute_error
# from skforecast.metrics import mean_absolute_scaled_error

In [9]:
import sklearn
import skforecast

print(f'sklearn: {sklearn.__version__}')
print(f'skforecast: {skforecast.__version__}')

sklearn: 1.4.2
skforecast: 0.12.1


In [7]:
#!pip install skforecast
!pip uninstall skforecast -y

Collecting skforecast
  Using cached skforecast-0.12.1-py3-none-any.whl.metadata (22 kB)
Using cached skforecast-0.12.1-py3-none-any.whl (560 kB)
Installing collected packages: skforecast
Successfully installed skforecast-0.12.1


## Single series

In [3]:
# Download data
# ==============================================================================
data = fetch_dataset(
    name="h2o", raw=True, kwargs_read_csv={"names": ["y", "datetime"], "header": 0},
    verbose=False
)

# Data preprocessing
# ==============================================================================
data['datetime'] = pd.to_datetime(data['datetime'], format='%Y-%m-%d')
data = data.set_index('datetime')
data = data.asfreq('MS')
data = data['y']
data = data.sort_index()

# Train-validation dates
# ==============================================================================
end_train = '2002-01-01 23:59:00'

In [4]:
# Backtesting forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
                 regressor = RandomForestRegressor(random_state=123),
                 lags      = 1 
             )

metric, predictions = backtesting_forecaster(
                          forecaster            = forecaster,
                          y                     = data,
                          steps                 = 10,
                          metric                = 'mean_absolute_scaled_error',
                          initial_train_size    = len(data.loc[:end_train]),
                          fixed_train_size      = False,
                          gap                   = 0,
                          allow_incomplete_fold = True,
                          refit                 = False,
                          n_jobs                = 'auto',
                          verbose               = False,
                          show_progress         = True  
                      )

print(f"Backtesting MASE: {metric}")

# Manual check
# ==============================================================================
mae_foreast = mean_absolute_error(data.loc[end_train:], predictions)
naive_in_sample_foreast = data.loc[:end_train].shift(1).dropna()
mae_in_sample = mean_absolute_error(data.loc[naive_in_sample_foreast.index], naive_in_sample_foreast)
print(f"mae forecast: {mae_foreast}")
print(f"mae in sample: {mae_in_sample}")
print(f"mase: {mae_foreast/mae_in_sample}")
assert metric == mae_foreast/mae_in_sample

  0%|          | 0/8 [00:00<?, ?it/s]

Backtesting MASE: 2.344471182248395
mae forecast: 0.19922359134545478
mae in sample: 0.08497591817460318
mase: 2.344471182248395


In [5]:
# Backtesting forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
                 regressor = RandomForestRegressor(random_state=123),
                 lags      = 1 
             )

metric, predictions = backtesting_forecaster(
                          forecaster            = forecaster,
                          y                     = data,
                          steps                 = 10,
                          metric                = 'mean_absolute_scaled_error',
                          initial_train_size    = len(data.loc[:end_train]),
                          fixed_train_size      = False,
                          gap                   = 0,
                          allow_incomplete_fold = True,
                          refit                 = 3,
                          n_jobs                = 'auto',
                          verbose               = True,
                          show_progress         = True  
                      )

print(f"Backtesting MASE: {metric}")

# Manual check
# ==============================================================================
refit_intervals = [
    ('1991-07-01 00:00:00', '2002-01-01 00:00:00'),
    ('1991-07-01 00:00:00', '2004-07-01 00:00:00'),
    ('1991-07-01 00:00:00', '2007-01-01 00:00:00')
]

mae_foreast = mean_absolute_error(data.loc[end_train:], predictions)
y_train = pd.concat([data.loc[start:end] for start, end in refit_intervals])
y_train = y_train.loc[~y_train.index.duplicated(keep='first')]
naive_in_sample_foreast = y_train.shift(1).dropna()
mae_in_sample = mean_absolute_error(y_train.loc[naive_in_sample_foreast.index], naive_in_sample_foreast)
print(f"mae forecast: {mae_foreast}")
print(f"mae in sample: {mae_in_sample}")
print(f"mase: {mae_foreast/mae_in_sample}")
assert metric == mae_foreast/mae_in_sample

Information of backtesting process
----------------------------------
Number of observations used for initial training: 127
Number of observations used for backtesting: 77
    Number of folds: 8
    Number of steps per fold: 10
    Number of steps to exclude from the end of each train set before test (gap): 0
    Last fold only includes 7 observations.

Fold: 0
    Training:   1991-07-01 00:00:00 -- 2002-01-01 00:00:00  (n=127)
    Validation: 2002-02-01 00:00:00 -- 2002-11-01 00:00:00  (n=10)
Fold: 1
    Training:   1991-07-01 00:00:00 -- 2002-01-01 00:00:00  (n=127)
    Validation: 2002-12-01 00:00:00 -- 2003-09-01 00:00:00  (n=10)
Fold: 2
    Training:   1991-07-01 00:00:00 -- 2002-01-01 00:00:00  (n=127)
    Validation: 2003-10-01 00:00:00 -- 2004-07-01 00:00:00  (n=10)
Fold: 3
    Training:   1991-07-01 00:00:00 -- 2004-07-01 00:00:00  (n=157)
    Validation: 2004-08-01 00:00:00 -- 2005-05-01 00:00:00  (n=10)
Fold: 4
    Training:   1991-07-01 00:00:00 -- 2004-07-01 00:00:00  (n=1

  0%|          | 0/8 [00:00<?, ?it/s]

Backtesting MASE: 2.462755601999461
mae forecast: 0.23185816123376632
mae in sample: 0.09414582634408604
mase: 2.462755601999461


## Multiseries

In [6]:
# Download data
# ==============================================================================
data = fetch_dataset(name="items_sales")
data.head()

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)


In [7]:
# Backtesting forecaster: no aggregation
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor = Ridge(),
                 lags      = 1 
             )

metric, predictions = backtesting_forecaster_multiseries(
                          forecaster            = forecaster,
                          series                = data,
                          steps                 = 10,
                          metric                = 'mean_absolute_scaled_error',
                          initial_train_size    = len(data.loc[:end_train]),
                          fixed_train_size      = True,
                          gap                   = 0,
                          allow_incomplete_fold = True,
                          refit                 = False,
                          n_jobs                = 'auto',
                          verbose               = False,
                          show_progress         = True  
                      )

print("Backtesting MASE:")
display(metric)

# Manual check
# ==============================================================================
mae_foreast = mean_absolute_error(
    data.loc[end_train:], predictions, multioutput="raw_values"
)
naive_in_sample_foreast = data.loc[:end_train].shift(1).dropna().drop_duplicates()
mae_in_sample = mean_absolute_error(
    data.loc[naive_in_sample_foreast.index],
    naive_in_sample_foreast,
    multioutput="raw_values",
)
mase = mae_foreast / mae_in_sample
print(f"mase: {mase}")
assert (metric['mean_absolute_scaled_error'] == mase).all()



  0%|          | 0/17 [00:00<?, ?it/s]

TypeError: mean_absolute_scaled_error() missing 1 required positional argument: 'y_train'

In [None]:
# Backtesting forecaster: aggregation
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor = Ridge(),
                 lags      = 1 
             )

metric, predictions = backtesting_forecaster_multiseries(
                          forecaster            = forecaster,
                          series                = data,
                          steps                 = 10,
                          metric                = ['mean_absolute_scaled_error', 'mean_absolute_error'],
                          add_aggregated_metric = True,
                          initial_train_size    = len(data.loc[:end_train]),
                          fixed_train_size      = True,
                          gap                   = 0,
                          allow_incomplete_fold = True,
                          refit                 = False,
                          n_jobs                = 'auto',
                          verbose               = False,
                          show_progress         = True  
                      )
display(metric)

# Manual check
# ==============================================================================
mae_foreast = mean_absolute_error(
    data.loc[end_train:], predictions, multioutput="raw_values"
)
naive_in_sample_foreast = data.loc[:end_train].shift(1).dropna().drop_duplicates()
mae_in_sample_foreast = mean_absolute_error(
    data.loc[naive_in_sample_foreast.index],
    naive_in_sample_foreast,
    multioutput="raw_values",
)
mase = mae_foreast/mae_in_sample
average_mase = mase.mean()
weighted_average_mase = np.average(mase, weights=predictions.notna().sum())
average_mae = mae_foreast.mean()
weighted_average_mae = np.average(mae_foreast, weights=predictions.notna().sum())

pooled_y_true_y_pred = pd.merge(
    data.melt(ignore_index=False).reset_index(names='datetime'),
    predictions.melt(ignore_index=False).reset_index(names='datetime'),
    on=['datetime', 'variable'],
    suffixes=('_true', '_pred'),
    how='inner'
)
pooled_mae_foreast = mean_absolute_error(
    pooled_y_true_y_pred['value_true'],
    pooled_y_true_y_pred['value_pred']
)
pooled_naive_in_sample_foreast = np.concatenate([np.diff(data.loc[:end_train, col]) for col in data.columns])
pooled_mae_in_sample_forecast = np.mean(np.abs(pooled_naive_in_sample_foreast))
pooled_mase = pooled_mae_foreast/pooled_mae_in_sample_forecast
expected_results = pd.DataFrame({
    'levels': ['average', 'weighted_average', 'pooling'],
    'mean_absolute_scaled_error': [average_mase, weighted_average_mase, pooled_mase],
    'mean_absolute_error': [average_mae, weighted_average_mae, pooled_mae_foreast]
})

pd.testing.assert_frame_equal(
    metric.query('levels in ["average", "weighted_average", "pooling"]').reset_index(drop=True),
    expected_results
)



  0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,levels,mean_absolute_scaled_error,mean_absolute_error
0,item_1,1.091527,1.69132
1,item_2,1.563339,3.707672
2,item_3,0.959262,3.571717
3,average,1.204709,2.990236
4,weighted_average,1.204709,2.990236
5,pooling,1.17348,2.990236


In [None]:
# Backtesting forecaster
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor = Ridge(),
                 lags      = 1 
             )

metric, predictions = backtesting_forecaster_multiseries(
                          forecaster            = forecaster,
                          series                = data,
                          steps                 = 10,
                          metric                = ['mean_absolute_scaled_error', 'mean_absolute_error'],
                          initial_train_size    = len(data.loc[:end_train]),
                          fixed_train_size      = True,
                          gap                   = 0,
                          allow_incomplete_fold = True,
                          refit                 = 10,
                          n_jobs                = 'auto',
                          verbose               = True,
                          show_progress         = True  
                      )

print("Backtesting MASE:")
display(metric)

# Manual check
# ==============================================================================
# Manual check
refit_intervals = [
    ('2012-01-01 00:00:00', '2014-07-15 00:00:00'),
    ('2012-04-10 00:00:00', '2014-10-23 00:00:00')
]

mae_forecast = mean_absolute_error(
    data.loc[end_train:], predictions, multioutput="raw_values"
)
data_train = pd.concat([data.loc[start:end, :] for start, end in refit_intervals])
data_train = data_train.loc[~data_train.index.duplicated(keep='first'), :]
naive_in_sample_foreast = data_train.shift(1).dropna()
mae_in_sample = mean_absolute_error(
    data.loc[naive_in_sample_foreast.index],
    naive_in_sample_foreast,
    multioutput="raw_values",
)
mase = mae_forecast/mae_in_sample
mase_average = mase.mean()
mase_weighted_average = np.average(mase, weights=predictions.notna().sum())

print(f"mae forecast: {mae_forecast}")
print(f"mae in sample: {mae_in_sample}")
print(f"mase: {mase}")
print(f"mase average: {mase_average}")
print(f"mase weighted average: {mase_weighted_average}")
assert (metric.loc[metric['levels']=='average', 'mean_absolute_scaled_error'] == mase_average).all()
assert (metric.loc[metric['levels']=='weighted_average', 'mean_absolute_scaled_error'] == mase_weighted_average).all()

Information of backtesting process
----------------------------------
Number of observations used for initial training: 927
Number of observations used for backtesting: 170
    Number of folds: 17
    Number of steps per fold: 10
    Number of steps to exclude from the end of each train set before test (gap): 0

Fold: 0
    Training:   2012-01-01 00:00:00 -- 2014-07-15 00:00:00  (n=927)
    Validation: 2014-07-16 00:00:00 -- 2014-07-25 00:00:00  (n=10)
Fold: 1
    Training:   2012-01-01 00:00:00 -- 2014-07-15 00:00:00  (n=927)
    Validation: 2014-07-26 00:00:00 -- 2014-08-04 00:00:00  (n=10)
Fold: 2
    Training:   2012-01-01 00:00:00 -- 2014-07-15 00:00:00  (n=927)
    Validation: 2014-08-05 00:00:00 -- 2014-08-14 00:00:00  (n=10)
Fold: 3
    Training:   2012-01-01 00:00:00 -- 2014-07-15 00:00:00  (n=927)
    Validation: 2014-08-15 00:00:00 -- 2014-08-24 00:00:00  (n=10)
Fold: 4
    Training:   2012-01-01 00:00:00 -- 2014-07-15 00:00:00  (n=927)
    Validation: 2014-08-25 00:00:00 --



  0%|          | 0/17 [00:00<?, ?it/s]

Backtesting MASE:


Unnamed: 0,levels,mean_absolute_scaled_error,mean_absolute_error
0,item_1,1.106355,1.690569
1,item_2,1.571983,3.670363
2,item_3,1.005813,3.715934


mae forecast: [1.69056888 3.67036308 3.71593437]
mae in sample: [1.52805244 2.33486251 3.69445992]
mase: [1.10635527 1.57198253 1.00581261]
mase average: 1.2280501371035948
mase weighted average: 1.228050137103595


In [None]:
# Download data
# ==============================================================================
data = fetch_dataset(name="items_sales")
data.head()

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
end_val = '2014-10-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_val = data.loc[end_train:end_val, :].copy()
data_test  = data.loc[end_val:, :].copy()

items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)


In [None]:
forecaster = ForecasterAutoregMultiSeries(
                 regressor = Ridge(),
                 lags      = 1 
             )
lags_grid =[1, 2, 3]
param_grid = {'alpha': [0, 1, 10000000000]}

results = grid_search_forecaster_multiseries(
                          forecaster            = forecaster,
                          series                = data.loc[:end_val, :],
                          lags_grid             = lags_grid,
                          param_grid            = param_grid,
                          steps                 = 12,
                          metric                = ['mean_absolute_error', 'mean_absolute_scaled_error'],
                          aggregate_metric      = 'pooling',
                          refit                 = False,
                          initial_train_size    = len(data.loc[:end_train]),
                          fixed_train_size      = True,
                          return_best           = False,
                          n_jobs                = 'auto',
                          verbose               = False,
                          show_progress         = True
                      )

results

9 models compared for 3 level(s). Number of iterations: 9.




lags grid:   0%|          | 0/3 [00:00<?, ?it/s]

params grid:   0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0,aggregation,lags,lags_label,params,mean_absolute_error,mean_absolute_scaled_error,alpha
0,pooling,"[1, 2, 3]","[1, 2, 3]",{'alpha': 0},3.011987,1.182015,0
1,pooling,"[1, 2, 3]","[1, 2, 3]",{'alpha': 1},3.012102,1.182061,1
2,pooling,"[1, 2]","[1, 2]",{'alpha': 0},3.285467,1.289339,0
3,pooling,"[1, 2]","[1, 2]",{'alpha': 1},3.285631,1.289404,1
4,pooling,[1],[1],{'alpha': 0},3.411347,1.338739,0
5,pooling,[1],[1],{'alpha': 1},3.411571,1.338827,1
6,pooling,"[1, 2, 3]","[1, 2, 3]",{'alpha': 10000000000},4.9861,1.956731,10000000000
7,pooling,"[1, 2]","[1, 2]",{'alpha': 10000000000},4.988957,1.957852,10000000000
8,pooling,[1],[1],{'alpha': 10000000000},4.99071,1.95854,10000000000


In [None]:
forecaster = ForecasterAutoregMultiSeries(
                 regressor = Ridge(),
                 lags      = 1,
                 encoding= 'ordinal'
             )

def search_space(trial):
    search_space  = {
        'lags'             : trial.suggest_categorical('lags', [3, 5]),
        'alpha'            : trial.suggest_float('alpha', 0.1, 1.0),
    }
    
    return search_space

results, best_trial = bayesian_search_forecaster_multiseries(
                          forecaster            = forecaster,
                          series                = data.loc[:end_val, :],
                          search_space          = search_space,
                          steps                 = 12,
                          metric                = 'mean_absolute_error',
                          aggregate_metric      = 'weighted_average',
                          refit                 = False,
                          initial_train_size    = len(data.loc[:end_train]),
                          fixed_train_size      = True,
                          n_trials              = 10,
                          random_state          = 123,
                          return_best           = False,
                          n_jobs                = 'auto',
                          verbose               = False,
                          show_progress         = True,
                          engine                = 'optuna',
                          kwargs_create_study   = {},
                          kwargs_study_optimize = {}
                      )

results.head(4)



  0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,aggregation,lags,params,mean_absolute_error,alpha
0,weighted_average,"[1, 2, 3, 4, 5]",{'alpha': 0.4807958141120149},2.562235,0.480796
1,weighted_average,"[1, 2, 3, 4, 5]",{'alpha': 0.667878511469039},2.562239,0.667879
2,weighted_average,"[1, 2, 3, 4, 5]",{'alpha': 0.6709608626961889},2.562239,0.670961
3,weighted_average,"[1, 2, 3]",{'alpha': 0.2579065805327433},3.012016,0.257907


In [10]:
# Unit test backtesting_forecaster_multiseries
# ==============================================================================
import re
import pytest
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from lightgbm import LGBMRegressor
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error

from skforecast.exceptions import IgnoredArgumentWarning
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.ForecasterAutoregMultiSeriesCustom import ForecasterAutoregMultiSeriesCustom
from skforecast.ForecasterAutoregMultiVariate import ForecasterAutoregMultiVariate
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import backtesting_forecaster_multivariate

# Fixtures
from skforecast.model_selection_multiseries.tests.fixtures_model_selection_multiseries import series
THIS_DIR = "/home/ubuntu/varios/skforecast/skforecast/model_selection_multiseries/tests"
series_dict = joblib.load(THIS_DIR+ "/" + 'fixture_sample_multi_series.joblib')
exog_dict = joblib.load(THIS_DIR+ "/" +'fixture_sample_multi_series_exog.joblib')
end_train = "2016-07-31 23:59:00"
series_dict_train = {k: v.loc[:end_train,] for k, v in series_dict.items()}
exog_dict_train = {k: v.loc[:end_train,] for k, v in exog_dict.items()}
series_dict_test = {k: v.loc[end_train:,] for k, v in series_dict.items()}
exog_dict_test = {k: v.loc[end_train:,] for k, v in exog_dict.items()}
series_with_nans = series.copy()
series_with_nans.loc[:10, 'l2'] = np.nan

def create_predictors(y): # pragma: no cover
    """
    Create first 2 lags of a time series.
    """
    lags = y[-1:-3:-1]

    return lags

def create_predictors_14(y): # pragma: no cover
    """
    Create first 14 lags of a time series.
    """
    lags = y[-1:-15:-1]

    return lags


In [11]:
forecaster = ForecasterAutoregMultiSeries(regressor=Ridge(random_state=123), 
                            lags=2, transformer_series=None,
                            encoding='onehot')


n_validation = 20
steps = 5
gap = 3

metrics_levels, backtest_predictions = backtesting_forecaster_multiseries(
                    forecaster            = forecaster,
                    series                = series_with_nans,
                    steps                 = steps,
                    levels                = 'l1',
                    metric                = 'mean_absolute_error',
                    initial_train_size    = len(series_with_nans) - n_validation,
                    gap                   = gap,
                    allow_incomplete_fold = True,
                    refit                 = False,
                    fixed_train_size      = False,
                    exog                  = series_with_nans['l1'].rename('exog_1'),
                    interval              = [5, 95],
                    n_boot                = 150,
                    random_state          = 123,
                    in_sample_residuals   = True,
                    verbose               = False
                )

expected_metric = pd.DataFrame({'levels': ['l1'], 
                                    'mean_absolute_error': [0.11243765852459384]})
metrics_levels

  0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0,levels,mean_absolute_error
0,l1,0.112515
