In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [2]:
from typing import Union, Tuple, Optional, Callable
import numpy as np
import pandas as pd
import warnings
import logging
from copy import deepcopy
from joblib import Parallel, delayed, cpu_count
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import ParameterSampler
import optuna
from optuna.samplers import TPESampler, RandomSampler

from skforecast.exceptions import LongTrainingWarning
from skforecast.exceptions import IgnoredArgumentWarning
from skforecast.utils import check_backtesting_input
from skforecast.model_selection.model_selection import _create_backtesting_folds
from skforecast.model_selection.model_selection import _get_metric

In [3]:
def _create_backtesting_folds_2(
    data: Union[pd.Series, pd.DataFrame],
    window_size: int,
    initial_train_size: Union[int, None],
    test_size: int,
    externally_fitted: bool=False,
    refit: Optional[Union[bool, int]]=False,
    fixed_train_size: bool=True,
    gap: int=0,
    allow_incomplete_fold: bool=True,
    return_all_indexes: bool=False,
    verbose: bool=True
) -> list:
    """
    This function is designed to work after passing the `check_backtesting_input` 
    function from `skforecast.utils`.

    Provides train/test indices (position) to split time series data samples that
    are observed at fixed time intervals, in train/test sets. In each split, test
    indices must be higher than before.

    Three arrays are returned for each fold with the position of train, test
    including the gap, and test excluding the gap. The gap is the number of
    samples to exclude from the end of each train set before the test set. The
    test excluding the gap is the one that must be used to make evaluate the
    model. The test including the gap is provided for convenience.

    Returned indexes are not the indexes of the original time series, but the
    positional indexes of the samples in the time series. For example, if the   
    original time series is `y = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]`, the
    returned indexes for the first fold if  `test_size = 4`, `gap = 1` and 
    `initial_train_size = 2` are: `[[0, 1], [2, 3, 4, 5], [3, 4, 5]]]`. This means
    that the first fold is using the samples with positional indexes 0 and 1 in
    the time series as training set, and the samples with positional indexes 2,
    3, 4 and 5 as test set, but only the samples with positional indexes 3, 4 and
    5 should be used to evaluate the model since `gap = 1`. The second fold would
    be `[[0, 1, 2, 3], [4, 5, 6, 7], [5, 6, 7]]`, and so on.

    Each fold also provides information as to whether the Forecaster needs to 
    be trained or not.
    
    Parameters
    ----------
    data : pandas Series, pandas DataFrame
        Time series values.
    window_size : int
        Size of the window needed to create the predictors.
    initial_train_size : int, None
        Size of the training set in the first fold. If `None` or 0, the initial
        fold does not include a training set.
    test_size : int
        Size of the test set in each fold.
    externally_fitted : bool, default `False`
        Flag indicating whether the forecaster is already trained. Only used when 
        `initial_train_size` is None and `refit` is False.
    refit : bool, int, default `False`
        Whether to re-fit the forecaster in each iteration. If `refit` is an integer, 
        the Forecaster will be trained every that number of iterations.
    fixed_train_size : bool, default `True`
        If True, train size doesn't increase but moves by `steps` in each iteration.
    gap : int, default `0`
        Number of samples to be excluded after the end of each training set and 
        before the test set.
    allow_incomplete_fold : bool, default `True`
        Last fold is allowed to have a smaller number of samples than the 
        `test_size`. If `False`, the last fold is excluded.
    return_all_indexes : bool, default `False`
        If `True`, return all the indexes included in each fold. If `False`, return
        only the first and last index of each partition in each fold.
    verbose : bool, default `True`
        Print information if the folds created.

    Returns
    -------
    folds : list
        List containing the indices (position) of `y` for training, test including
        the gap, test excluding the gap for each fold, and whether fir the Forecaster.
    
    """
    
    idx = range(len(data))
    folds = []
    i = 0
    last_fold_excluded = False

    while initial_train_size + (i * test_size) + gap < len(data):

        if refit:
            # If fixed_train_size the train size doesn't increase but moves by 
            # `test_size` positions in each iteration. If False, the train size
            # increases by `test_size` in each iteration.
            train_idx_start = i * (test_size) if fixed_train_size else 0
            train_idx_end = initial_train_size + i * (test_size)
            test_idx_start = train_idx_end
        else:
            # The train size doesn't increase and doesn't move.
            train_idx_start = 0
            train_idx_end = initial_train_size
            test_idx_start = initial_train_size + i * (test_size)
            
        last_window_start = test_idx_start - window_size 
        test_idx_end = test_idx_start + gap + test_size
    
        partitions = [
            idx[train_idx_start : train_idx_end],
            idx[last_window_start : test_idx_start],
            idx[test_idx_start : test_idx_end],
            idx[test_idx_start + gap : test_idx_end]
        ]
        folds.append(partitions)
        i += 1

    if not allow_incomplete_fold:
        if len(folds[-1][2]) < test_size:
            folds = folds[:-1]
            last_fold_excluded = True

    # Replace partitions inside folds with length 0 with None
    folds = [[partition if len(partition) > 0 else None 
              for partition in fold] 
             for fold in folds]

    # Create a flag to know whether to train the forecaster
    if refit == 0:
        refit = False
        
    if isinstance(refit, bool):
        fit_forecaster = [refit]*len(folds)
        fit_forecaster[0] = True
    else:
        fit_forecaster = [False]*len(folds)
        for i in range(0, len(fit_forecaster), refit): 
            fit_forecaster[i] = True
    
    for i in range(len(folds)): 
        folds[i].append(fit_forecaster[i])
        if fit_forecaster[i] is False:
            folds[i][0] = folds[i-1][0]

    # This is done to allow parallelization. The first Forecaster fit is 
    # outside the auxiliary function.
    fit_forecaster[0] = False
    
    if verbose:
        print("Information of backtesting process")
        print("----------------------------------")
        if externally_fitted:
            print(f"An already trained forecaster is to be used. Window size: {window_size}")
        else:
            print(f"Number of observations used for initial training: {initial_train_size}")
        print(f"Number of observations used for backtesting: {len(data) - initial_train_size}")
        print(f"    Number of folds: {len(folds)}")
        print(f"    Number of steps per fold: {test_size}")
        print(f"    Number of steps to exclude from the end of each train set before test (gap): {gap}")
        if last_fold_excluded:
            print("    Last fold has been excluded because it was incomplete.")
        if len(folds[-1][3]) < test_size:
            print(f"    Last fold only includes {len(folds[-1][3])} observations.")
        print("")

        for i, fold in enumerate(folds):
            training_start    = data.index[fold[0][0]] if fold[0] is not None else None
            training_end      = data.index[fold[0][-1]] if fold[0] is not None else None
            training_length   = len(fold[0]) if fold[0] is not None else 0
            validation_start  = data.index[fold[3][0]]
            validation_end    = data.index[fold[3][-1]]
            validation_length = len(fold[3])
            print(f"Fold: {i}")
            if not externally_fitted:
                print(
                    f"    Training:   {training_start} -- {training_end}  (n={training_length})"
                )
            print(
                f"    Validation: {validation_start} -- {validation_end}  (n={validation_length})"
            )
        print("")

    if not return_all_indexes:
        # +1 to prevent iloc pandas from deleting the last observation
        folds = [
            [[fold[0][0], fold[0][-1]+1], 
             [fold[1][0], fold[1][-1]+1], 
             [fold[2][0], fold[2][-1]+1],
             [fold[3][0], fold[3][-1]+1],
             fold[4]] 
            for fold in folds
        ]

    return folds

In [8]:
from skforecast.model_selection.model_selection import _create_backtesting_folds

y = pd.Series(np.arange(100))
y.index = pd.date_range(start='2022-01-01', periods=100, freq='D')
initial_train_size = 50
gap = 5
test_size = 7
refit = False
allow_incomplete_fold = False

folds = _create_backtesting_folds_2(
            data                  = y,
            window_size           = 5,
            initial_train_size    = initial_train_size,
            test_size             = test_size,
            externally_fitted     = False,
            refit                 = 2,
            fixed_train_size      = False,
            gap                   = gap,
            allow_incomplete_fold = allow_incomplete_fold,
            return_all_indexes    = False,
            verbose               = False
        )

In [9]:
folds

[[[0, 50], [45, 50], [50, 62], [55, 62], True],
 [[0, 50], [52, 57], [57, 69], [62, 69], False],
 [[0, 64], [59, 64], [64, 76], [69, 76], True],
 [[0, 64], [66, 71], [71, 83], [76, 83], False],
 [[0, 78], [73, 78], [78, 90], [83, 90], True],
 [[0, 78], [80, 85], [85, 97], [90, 97], False],
 [[0, 92], [87, 92], [92, 100], [97, 100], True]]

In [43]:
refit = 0

if refit:
    print(2)

In [56]:
refit = 4

if isinstance(refit, bool):
    fit_forecaster = [refit]*len(folds)
else:
    fit_forecaster = [False]*len(folds)
    for i in range(0, len(fit_forecaster), refit): fit_forecaster[i] = True

for i in range(len(folds)): 
    folds[i].append(fit_forecaster[i])
    if fit_forecaster[i] is False:
        folds[i][0] = folds[i-1][0]

folds

[[[0, 91], [70, 82], [75, 82], 1, True, False, True, True, True],
 [[0, 91], [77, 89], [82, 89], 1, True, False, False, False, False],
 [[0, 91], [84, 96], [89, 96], 1, True, False, True, False, False],
 [[0, 91], [91, 100], [96, 100], 1, True, False, False, True, False]]

In [25]:
# Replace partitions inside folds with length 0 with None
folds[0].append(True)

In [72]:
aa = [False, False, False]

In [74]:
aa[0] = True
aa

[True, False, False]

In [21]:
n_boot = -1

if not isinstance(n_boot, (bool, int, np.integer)) or n_boot < 0:
    print(1)

1


# _backtesting_forecaster

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection.model_selection import _backtesting_forecaster

from skforecast.model_selection.tests.fixtures_model_selection import y
from skforecast.model_selection.tests.fixtures_model_selection import exog
from skforecast.model_selection.tests.fixtures_model_selection import out_sample_residuals

In [20]:
%%timeit -n 200 -r 4

forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

n_backtest = 12
y_train = y[:-n_backtest]

metric, backtest_predictions = _backtesting_forecaster(
                                    forecaster          = forecaster,
                                    y                   = y,
                                    exog                = None,
                                    refit = True,
                                    initial_train_size  = len(y_train),
                                    steps               = 4,
                                    metric              = 'mean_squared_error',
                                    interval            = None,
                                    n_boot              = 500,
                                    random_state        = 123,
                                    in_sample_residuals = True,
                                    n_jobs              = 1,
                                    verbose             = False,
                                    show_progress = False
                                )

6.27 ms ± 74 µs per loop (mean ± std. dev. of 4 runs, 200 loops each)


In [18]:
%%timeit -n 200 -r 4

forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

n_backtest = 12
y_train = y[:-n_backtest]

metric, backtest_predictions = _backtesting_forecaster(
                                    forecaster          = forecaster,
                                    y                   = y,
                                    exog                = None,
                                    refit = True,
                                    initial_train_size  = len(y_train),
                                    steps               = 4,
                                    metric              = 'mean_squared_error',
                                    interval            = None,
                                    n_boot              = 500,
                                    random_state        = 123,
                                    in_sample_residuals = True,
                                    n_jobs              = 1,
                                    verbose             = False,
                                    show_progress = False
                                )

6.29 ms ± 60.9 µs per loop (mean ± std. dev. of 4 runs, 200 loops each)


## Tests

In [9]:
import pytest
import numpy as np
import pandas as pd
from pytest import approx
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection.model_selection import backtesting_forecaster

from skforecast.model_selection.tests.fixtures_model_selection import y
from skforecast.model_selection.tests.fixtures_model_selection import exog
from skforecast.model_selection.tests.fixtures_model_selection import out_sample_residuals

In [13]:
forecaster = ForecasterAutoregDirect(
                     regressor = Ridge(random_state=123), 
                     lags      = 3,
                     steps     = 8
                 )

n_backtest = 20
y_train = y[:-n_backtest]

metric, backtest_predictions = backtesting_forecaster(
                                    forecaster            = forecaster,
                                    y                     = y,
                                    exog                  = exog,
                                    refit                 = 2,
                                    initial_train_size    = len(y_train),
                                    fixed_train_size      = False,
                                    gap                   = 0,
                                    allow_incomplete_fold = True,
                                    steps                 = 2,
                                    metric                = 'mean_squared_error',
                                    interval              = [5, 95],
                                    n_boot                = 500,
                                    random_state          = 123,
                                    in_sample_residuals   = True,
                                    verbose               = False
                                )

  0%|          | 0/10 [00:00<?, ?it/s]

In [14]:
metric

0.06099110404144631

In [12]:
backtest_predictions.to_numpy()

array([[0.55616986, 0.15288789, 0.89198752],
       [0.48751797, 0.14866438, 0.83169303],
       [0.57764391, 0.17436194, 0.91346157],
       [0.51298667, 0.17413308, 0.85716173],
       [0.47430051, 0.0796644 , 0.82587748],
       [0.49192271, 0.14609696, 0.95959395],
       [0.52213783, 0.12750172, 0.8737148 ],
       [0.54492575, 0.1991    , 1.012597  ],
       [0.52501537, 0.13641764, 0.86685356],
       [0.4680474 , 0.08515461, 0.81792677],
       [0.51059498, 0.12199725, 0.85243317],
       [0.53067132, 0.14777853, 0.88055069],
       [0.4430938 , 0.0509291 , 0.69854202],
       [0.49911716, 0.1231365 , 0.8711497 ],
       [0.44546347, 0.05329877, 0.70091169],
       [0.46530749, 0.08932683, 0.83734003],
       [0.46901878, 0.08173407, 0.82098555],
       [0.55371362, 0.14618224, 0.98199137],
       [0.60759064, 0.22030593, 0.9595574 ],
       [0.50415336, 0.09662198, 0.93243111]])

In [43]:
y_with_index = y.copy()
y_with_index.index = pd.date_range(start='2022-01-01', periods=50, freq='D')
exog_with_index = exog.copy()
exog_with_index.index = pd.date_range(start='2022-01-01', periods=50, freq='D')

forecaster = ForecasterAutoreg(regressor=Ridge(random_state=123), lags=3)

metric, backtest_predictions = backtesting_forecaster(
                                    forecaster            = forecaster,
                                    y                     = y_with_index,
                                    exog                  = exog_with_index,
                                    refit                 = 3,
                                    initial_train_size    = len(y_with_index) - 20,
                                    fixed_train_size      = True,
                                    gap                   = 3,
                                    allow_incomplete_fold = False,
                                    steps                 = 4,
                                    metric                = 'mean_squared_error',
                                    interval              = [5, 95],
                                    n_boot                = 500,
                                    random_state          = 123,
                                    in_sample_residuals   = True,
                                    verbose               = True,
                                    n_jobs                = 1
                                )
backtest_predictions = backtest_predictions.asfreq('D')

Information of backtesting process
----------------------------------
Number of observations used for initial training: 30
Number of observations used for backtesting: 20
    Number of folds: 4
    Number of steps per fold: 4
    Number of steps to exclude from the end of each train set before test (gap): 3
    Last fold has been excluded because it was incomplete.

Fold: 0
    Training:   2022-01-01 00:00:00 -- 2022-01-30 00:00:00  (n=30)
    Validation: 2022-02-03 00:00:00 -- 2022-02-06 00:00:00  (n=4)
Fold: 1
    Training:   2022-01-01 00:00:00 -- 2022-01-30 00:00:00  (n=30)
    Validation: 2022-02-07 00:00:00 -- 2022-02-10 00:00:00  (n=4)
Fold: 2
    Training:   2022-01-01 00:00:00 -- 2022-01-30 00:00:00  (n=30)
    Validation: 2022-02-11 00:00:00 -- 2022-02-14 00:00:00  (n=4)
Fold: 3
    Training:   2022-01-13 00:00:00 -- 2022-02-11 00:00:00  (n=30)
    Validation: 2022-02-15 00:00:00 -- 2022-02-18 00:00:00  (n=4)



  0%|          | 0/4 [00:00<?, ?it/s]

In [33]:
metric

0.060991643719298785

In [36]:
forecaster.fit(y=y_with_index.iloc[:30], exog=exog_with_index[:30])

In [37]:
forecaster.predict(7, exog=exog_with_index.iloc[30:37]).tail(4)

2022-02-03    0.518786
2022-02-04    0.492698
2022-02-05    0.493804
2022-02-06    0.514675
Freq: D, Name: pred, dtype: float64

In [39]:
forecaster.predict(7, last_window=y_with_index.iloc[:34], 
                   exog=exog_with_index.iloc[34:42]).tail(4)

2022-02-07    0.526900
2022-02-08    0.517320
2022-02-09    0.512903
2022-02-10    0.503343
Freq: D, Name: pred, dtype: float64

In [40]:
forecaster.predict(7, last_window=y_with_index.iloc[:38], 
                   exog=exog_with_index.iloc[38:45]).tail(4)

2022-02-11    0.501715
2022-02-12    0.509469
2022-02-13    0.502004
2022-02-14    0.504360
Freq: D, Name: pred, dtype: float64

In [44]:
forecaster.fit(y=y_with_index.iloc[12:42], exog=exog_with_index[12:42])
forecaster.predict(7, exog=exog_with_index.iloc[42:49]).tail(4)

2022-02-15    0.453419
2022-02-16    0.526217
2022-02-17    0.504778
2022-02-18    0.522353
Freq: D, Name: pred, dtype: float64

In [46]:
(backtest_predictions)

Unnamed: 0,pred,lower_bound,upper_bound
2022-02-03,0.518786,0.17324,0.877714
2022-02-04,0.492698,0.152233,0.868473
2022-02-05,0.493804,0.140035,0.812267
2022-02-06,0.514675,0.182259,0.915069
2022-02-07,0.5269,0.181354,0.885829
2022-02-08,0.51732,0.176855,0.893095
2022-02-09,0.512903,0.159134,0.831365
2022-02-10,0.503343,0.170928,0.903737
2022-02-11,0.501715,0.156169,0.860643
2022-02-12,0.509469,0.169004,0.885245


In [35]:
backtest_predictions.to_numpy()

array([[0.51878642, 0.17324039, 0.87771447],
       [0.49269791, 0.15223278, 0.86847335],
       [0.49380441, 0.14003517, 0.81226679],
       [0.51467463, 0.18225936, 0.91506854],
       [0.52690045, 0.18135443, 0.8858285 ],
       [0.51731996, 0.17685482, 0.8930954 ],
       [0.51290311, 0.15913388, 0.83136549],
       [0.50334306, 0.17092779, 0.90373697],
       [0.50171526, 0.15616923, 0.86064331],
       [0.50946908, 0.16900395, 0.88524452],
       [0.50200357, 0.14823433, 0.82046595],
       [0.50436041, 0.17194514, 0.90475432],
       [0.4534189 , 0.09320851, 0.83561058],
       [0.52621695, 0.17305963, 0.91133042],
       [0.50477802, 0.10690002, 0.88750077],
       [0.52235258, 0.1700762 , 0.91128311]])