In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [2]:
from typing import Union, Tuple, Optional, Callable
import numpy as np
import pandas as pd

In [57]:
def _create_backtesting_folds_2(
    data: Union[pd.Series, pd.DataFrame],
    initial_train_size: Union[int, None],
    test_size: int,
    externally_fitted: bool=False,
    refit: Optional[Union[bool, int]]=False,
    fixed_train_size: bool=True,
    gap: int=0,
    allow_incomplete_fold: bool=True,
    return_all_indexes: bool=False,
    verbose: bool=True
) -> list:
    """    
    """
    
    idx = range(len(data))
    folds = []
    i = 0
    last_fold_excluded = False

    while initial_train_size + (i * test_size) + gap < len(data):

        if refit:
            # If fixed_train_size the train size doesn't increase but moves by 
            # `test_size` positions in each iteration. If False, the train size
            # increases by `test_size` in each iteration.
            train_idx_start = i * (test_size) if fixed_train_size else 0
            train_idx_end = initial_train_size + i * (test_size)
            test_idx_start = train_idx_end
        else:
            # The train size doesn't increase and doesn't move.
            train_idx_start = 0
            train_idx_end = initial_train_size
            test_idx_start = initial_train_size + i * (test_size)

        test_idx_end = test_idx_start + gap + test_size
    
        partitions = [
            idx[train_idx_start : train_idx_end],
            idx[test_idx_start : test_idx_end],
            idx[test_idx_start + gap : test_idx_end]
        ]
        folds.append(partitions)
        i += 1

    if not allow_incomplete_fold:
        if len(folds[-1][2]) < test_size:
            folds = folds[:-1]
            last_fold_excluded = True

    # Replace partitions inside folds with length 0 with None
    folds = [[partition if len(partition) > 0 else None 
              for partition in fold] 
             for fold in folds]

    # Create a flag to know whether to train the forecaster
    if isinstance(refit, bool):
        fit_forecaster = [refit]*len(folds)
        fit_forecaster[0] = True
    else:
        fit_forecaster = [False]*len(folds)
        for i in range(0, len(fit_forecaster), refit): fit_forecaster[i] = True
    
    for i in range(len(folds)): 
        folds[i].append(fit_forecaster[i])
        if fit_forecaster[i] is False:
            folds[i][0] = folds[i-1][0]
    
    if verbose:
        print("Information of backtesting process")
        print("----------------------------------")
        if externally_fitted:
            print(f"An already trained forecaster is to be used. Window size: {initial_train_size}")
        else:
            print(f"Number of observations used for initial training: {initial_train_size}")
        print(f"Number of observations used for backtesting: {len(data) - initial_train_size}")
        print(f"    Number of folds: {len(folds)}")
        print(f"    Number of steps per fold: {test_size}")
        print(f"    Number of steps to exclude from the end of each train set before test (gap): {gap}")
        if last_fold_excluded:
            print("    Last fold has been excluded because it was incomplete.")
        if len(folds[-1][2]) < test_size:
            print(f"    Last fold only includes {len(folds[-1][2])} observations.")
        print("")

        for i, fold in enumerate(folds):
            training_start    = data.index[fold[0][0]] if fold[0] is not None else None
            training_end      = data.index[fold[0][-1]] if fold[0] is not None else None
            training_length   = len(fold[0]) if fold[0] is not None else 0
            validation_start  = data.index[fold[2][0]]
            validation_end    = data.index[fold[2][-1]]
            validation_length = len(fold[2])
            print(f"Fold: {i}")
            if not externally_fitted:
                print(
                    f"    Training:   {training_start} -- {training_end}  (n={training_length})"
                )
            print(
                f"    Validation: {validation_start} -- {validation_end}  (n={validation_length})"
            )
        print("")

    if not return_all_indexes:
        # +1 to prevent iloc pandas from deleting the last observation
        folds = [
            [[fold[0][0], fold[0][-1]+1], 
             [fold[1][0], fold[1][-1]+1], 
             [fold[2][0], fold[2][-1]+1],
             fold[3]] 
            for fold in folds
        ]

    return folds

In [67]:
from skforecast.model_selection.model_selection import _create_backtesting_folds

y = pd.Series(np.arange(100))
y.index = pd.date_range(start='2022-01-01', periods=100, freq='D')
initial_train_size = 50
gap = 5
test_size = 7
refit = False
allow_incomplete_fold = True

folds = _create_backtesting_folds_2(
            data                  = y,
            initial_train_size    = initial_train_size,
            test_size             = test_size,
            externally_fitted     = False,
            refit                 = refit,
            fixed_train_size      = False,
            gap                   = gap,
            allow_incomplete_fold = allow_incomplete_fold,
            return_all_indexes    = False,
            verbose               = False
        )

In [68]:
folds

[[[0, 50], [50, 62], [55, 62], False],
 [[0, 50], [57, 69], [62, 69], False],
 [[0, 50], [64, 76], [69, 76], False],
 [[0, 50], [71, 83], [76, 83], False],
 [[0, 50], [78, 90], [83, 90], False],
 [[0, 50], [85, 97], [90, 97], False],
 [[0, 50], [92, 100], [97, 100], False]]

In [56]:
refit = 4

if isinstance(refit, bool):
    fit_forecaster = [refit]*len(folds)
else:
    fit_forecaster = [False]*len(folds)
    for i in range(0, len(fit_forecaster), refit): fit_forecaster[i] = True

for i in range(len(folds)): 
    folds[i].append(fit_forecaster[i])
    if fit_forecaster[i] is False:
        folds[i][0] = folds[i-1][0]

folds

[[[0, 91], [70, 82], [75, 82], 1, True, False, True, True, True],
 [[0, 91], [77, 89], [82, 89], 1, True, False, False, False, False],
 [[0, 91], [84, 96], [89, 96], 1, True, False, True, False, False],
 [[0, 91], [91, 100], [96, 100], 1, True, False, False, True, False]]

In [25]:
# Replace partitions inside folds with length 0 with None
folds[0].append(True)

In [72]:
aa = [False, False, False]

In [74]:
aa[0] = True
aa

[True, False, False]