# Backtesting with fixed training set

In [3]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent.parent))
%config Completer.use_jedi = False

In [7]:
# All mocked backtesting are done in skforecast 0.4.2
# ==============================================================================
import skforecast
skforecast.__version__ == '0.4.2'

False

In [8]:
# !pip uninstall skforecast -y

In [9]:
# Unit test __init__
# ==============================================================================
import pytest
from pytest import approx
from typing import Union, Tuple, Optional, Any
import numpy as np
import pandas as pd
import warnings
import logging
from copy import deepcopy
from tqdm import tqdm
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from skforecast.model_selection import backtesting_forecaster
from skforecast.model_selection.model_selection import _get_metric
from skforecast.model_selection.model_selection import _backtesting_forecaster_refit
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
import warnings

## _Backtesting_forecaster_refit fixed_train_size

### Original

In [10]:
def _backtesting_forecaster_refit(
    forecaster,
    y: pd.Series,
    steps: int,
    metric: Union[str, callable],
    initial_train_size: int,
    exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
    interval: Optional[list]=None,
    n_boot: int=500,
    random_state: int=123,
    in_sample_residuals: bool=True,
    verbose: bool=False,
    set_out_sample_residuals: Any='deprecated'
) -> Tuple[np.array, pd.DataFrame]:
    '''
    Backtesting of forecaster model with a re-fitting strategy. A copy of the  
    original forecaster is created so it is not modified during the process.
    
    In each iteration:
        - Fit forecaster with the training set.
        - A number of `steps` ahead are predicted.
        - The training set increases with `steps` observations.
        - The model is re-fitted using the new training set.

    In order to apply backtesting with re-fit, an initial training set must be
    available, otherwise it would not be possible to increase the training set 
    after each iteration. `initial_train_size` must be provided.
    
    Parameters
    ----------
    forecaster : ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregMultiOutput
        Forecaster model.
        
    y : pandas Series
        Training time series values. 
    
    initial_train_size: int
        Number of samples in the initial train split. The backtest forecaster is
        trained using the first `initial_train_size` observations.
        
    steps : int
        Number of steps to predict.
        
    metric : str, callable
        Metric used to quantify the goodness of fit of the model.
        
        If string:
            {'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error'}

        It callable:
            Function with arguments y_true, y_pred that returns a float.
        
    exog :panda Series, pandas DataFrame, default `None`
        Exogenous variable/s included as predictor/s. Must have the same
        number of observations as `y` and should be aligned so that y[i] is
        regressed on exog[i].

    interval: list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. If `None`, no
        intervals are estimated. Only available for forecaster of type ForecasterAutoreg
        and ForecasterAutoregCustom.
            
    n_boot: int, default `500`
        Number of bootstrapping iterations used to estimate prediction
        intervals.

    random_state: int, default 123
        Sets a seed to the random generator, so that boot intervals are always 
        deterministic.

    in_sample_residuals: bool, default `True`
        If `True`, residuals from the training data are used as proxy of
        prediction error to create prediction intervals. If `False`, out_sample_residuals
        are used if they are already stored inside the forecaster.

    set_out_sample_residuals: 'deprecated'
        Deprecated since version 0.4.2, will be removed on version 0.5.0.
            
    verbose : bool, default `False`
        Print number of folds and index of training and validation sets used for backtesting.

    Returns 
    -------
    metric_value: numpy ndarray shape (1,)
        Value of the metric.

    backtest_predictions: pandas Dataframe
        Value of predictions and their estimated interval if `interval` is not `None`.
            column pred = predictions.
            column lower_bound = lower bound of the interval.
            column upper_bound = upper bound interval of the interval.

    '''
    
    forecaster = deepcopy(forecaster)
    if isinstance(metric, str):
        metric = _get_metric(metric=metric)
    backtest_predictions = []
    
    folds = int(np.ceil((len(y) - initial_train_size) / steps))
    remainder = (len(y) - initial_train_size) % steps
    
    if verbose:
        print(f"Information of backtesting process")
        print(f"----------------------------------")
        print(f"Number of observations used for initial training: {initial_train_size}")
        print(f"Number of observations used for backtesting: {len(y) - initial_train_size}")
        print(f"    Number of folds: {folds}")
        print(f"    Number of steps per fold: {steps}")
        if remainder != 0:
            print(f"    Last fold only includes {remainder} observations.")
        print("")
        for i in range(folds):
            train_size = initial_train_size + i * steps
            print(f"Data partition in fold: {i}")
            if i < folds - 1:
                print(f"    Training:   {y.index[0]} -- {y.index[train_size - 1]}")
                print(f"    Validation: {y.index[train_size]} -- {y.index[train_size + steps - 1]}")
            else:
                print(f"    Training:   {y.index[0]} -- {y.index[train_size - 1]}")
                print(f"    Validation: {y.index[train_size]} -- {y.index[-1]}")
        print("")
        
    if folds > 50:
        print(
            f"Forecaster will be fit {folds} times. This can take substantial amounts of time. "
            f"If not feasible, try with `refit = False`. \n"
        )

    for i in range(folds):
        # In each iteration (except the last one) the model is fitted before
        # making predictions. The train size increases by `steps` in each iteration.
        train_size = initial_train_size + i * steps
        if exog is not None:
            next_window_exog = exog.iloc[train_size:train_size + steps, ]

        if interval is None:

            if i < folds - 1:
                if exog is None:
                    forecaster.fit(y=y.iloc[:train_size])
                    pred = forecaster.predict(steps=steps)
                else:
                    forecaster.fit(y=y.iloc[:train_size], exog=exog.iloc[:train_size, ])
                    pred = forecaster.predict(steps=steps,exog=next_window_exog)
            else:    
                if remainder == 0:
                    if exog is None:
                        forecaster.fit(y=y.iloc[:train_size])
                        pred = forecaster.predict(steps=steps)
                    else:
                        forecaster.fit(y=y.iloc[:train_size], exog=exog.iloc[:train_size, ])
                        pred = forecaster.predict(steps=steps, exog=next_window_exog)
                else:
                    # Only the remaining steps need to be predicted
                    steps = remainder
                    if exog is None:
                        forecaster.fit(y=y.iloc[:train_size])
                        pred = forecaster.predict(steps=steps)
                    else:
                        forecaster.fit(y=y.iloc[:train_size], exog=exog.iloc[:train_size, ])
                        pred = forecaster.predict(steps=steps, exog=next_window_exog)
        else:

            if i < folds - 1:
                if exog is None:
                    forecaster.fit(y=y.iloc[:train_size])
                    pred = forecaster.predict_interval(
                                steps        = steps,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                            )
                else:
                    forecaster.fit(y=y.iloc[:train_size], exog=exog.iloc[:train_size, ])
                    pred = forecaster.predict_interval(
                                steps        = steps,
                                exog         = next_window_exog,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                           )
            else:    
                if remainder == 0:
                    if exog is None:
                        forecaster.fit(y=y.iloc[:train_size])
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                            )
                    else:
                        forecaster.fit(y=y.iloc[:train_size], exog=exog.iloc[:train_size, ])
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                exog         = next_window_exog,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                           )
                else:
                    # Only the remaining steps need to be predicted
                    steps = remainder
                    if exog is None:
                        forecaster.fit(y=y.iloc[:train_size])
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                            )
                    else:
                        forecaster.fit(y=y.iloc[:train_size], exog=exog.iloc[:train_size, ])
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                exog         = next_window_exog,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                           )

        backtest_predictions.append(pred)
    
    backtest_predictions = pd.concat(backtest_predictions)
    if isinstance(backtest_predictions, pd.Series):
            backtest_predictions = pd.DataFrame(backtest_predictions)

    metric_value = metric(
                    y_true = y.iloc[initial_train_size: initial_train_size + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )

    return np.array([metric_value]), backtest_predictions

In [18]:
y = pd.Series(np.arange(40))
forecaster = ForecasterAutoreg(
                regressor = RandomForestRegressor(random_state=123),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 15
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

In [22]:
folds = int(np.ceil((len(y) - len(y_train)) / 4))
remainder = (len(y) - len(y_train)) % 4
print(len(y_train), folds, remainder)

25 4 3


In [20]:
metric, predictions_backtest = backtesting_forecaster(
                                    forecaster = forecaster,
                                    y          = y,
                                    initial_train_size = len(y_train),
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    refit      = True,
                                    verbose    = True
                               )

# print(metric, predictions_backtest)

Information of backtesting process
----------------------------------
Number of observations used for initial training: 25
Number of observations used for backtesting: 15
    Number of folds: 4
    Number of steps per fold: 4
    Last fold only includes 3 observations.

Data partition in fold: 0
    Training:   0 -- 24
    Validation: 25 -- 28
Data partition in fold: 1
    Training:   0 -- 28
    Validation: 29 -- 32
Data partition in fold: 2
    Training:   0 -- 32
    Validation: 33 -- 36
Data partition in fold: 3
    Training:   0 -- 36
    Validation: 37 -- 39

[9.85436667]      pred
25  23.33
26  23.33
27  23.33
28  23.33
29  27.52
30  27.52
31  27.52
32  27.52
33  31.53
34  31.53
35  31.53
36  31.53
37  35.43
38  35.43
39  35.43


###  New

In [13]:
def _backtesting_forecaster_refit_n(
    forecaster,
    y: pd.Series,
    steps: int,
    metric: Union[str, callable],
    initial_train_size: int,
    fixed_train_size: bool=False,
    exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
    interval: Optional[list]=None,
    n_boot: int=500,
    random_state: int=123,
    in_sample_residuals: bool=True,
    verbose: bool=False,
    set_out_sample_residuals: Any='deprecated'
) -> Tuple[np.array, pd.DataFrame]:
    '''
    Backtesting of forecaster model with a re-fitting strategy. A copy of the  
    original forecaster is created so it is not modified during the process.
    
    In each iteration:
        - Fit forecaster with the training set.
        - A number of `steps` ahead are predicted.
        - The training set increases with `steps` observations.
        - The model is re-fitted using the new training set.

    In order to apply backtesting with re-fit, an initial training set must be
    available, otherwise it would not be possible to increase the training set 
    after each iteration. `initial_train_size` must be provided.
    
    Parameters
    ----------
    forecaster : ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregMultiOutput
        Forecaster model.
        
    y : pandas Series
        Training time series values. 
    
    initial_train_size: int
        Number of samples in the initial train split. The backtest forecaster is
        trained using the first `initial_train_size` observations.
        
    fixed_train_size: bool, default `False`
        If True, train size doesn't increases but moves by `steps` in each iteration.
        
    steps : int
        Number of steps to predict.
        
    metric : str, callable
        Metric used to quantify the goodness of fit of the model.
        
        If string:
            {'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error'}

        It callable:
            Function with arguments y_true, y_pred that returns a float.
        
    exog :panda Series, pandas DataFrame, default `None`
        Exogenous variable/s included as predictor/s. Must have the same
        number of observations as `y` and should be aligned so that y[i] is
        regressed on exog[i].

    interval: list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. If `None`, no
        intervals are estimated. Only available for forecaster of type ForecasterAutoreg
        and ForecasterAutoregCustom.
            
    n_boot: int, default `500`
        Number of bootstrapping iterations used to estimate prediction
        intervals.

    random_state: int, default 123
        Sets a seed to the random generator, so that boot intervals are always 
        deterministic.

    in_sample_residuals: bool, default `True`
        If `True`, residuals from the training data are used as proxy of
        prediction error to create prediction intervals. If `False`, out_sample_residuals
        are used if they are already stored inside the forecaster.

    set_out_sample_residuals: 'deprecated'
        Deprecated since version 0.4.2, will be removed on version 0.5.0.
            
    verbose : bool, default `False`
        Print number of folds and index of training and validation sets used for backtesting.

    Returns 
    -------
    metric_value: numpy ndarray shape (1,)
        Value of the metric.

    backtest_predictions: pandas Dataframe
        Value of predictions and their estimated interval if `interval` is not `None`.
            column pred = predictions.
            column lower_bound = lower bound of the interval.
            column upper_bound = upper bound interval of the interval.

    '''
    forecaster = deepcopy(forecaster)
    if isinstance(metric, str):
        metric = _get_metric(metric=metric)
    backtest_predictions = []
    
    folds = int(np.ceil((len(y) - initial_train_size) / steps))
    remainder = (len(y) - initial_train_size) % steps
    
    if verbose:
        print(f"Information of backtesting process")
        print(f"----------------------------------")
        print(f"Number of observations used for initial training: {initial_train_size}")
        print(f"Number of observations used for backtesting: {len(y) - initial_train_size}")
        print(f"    Number of folds: {folds}")
        print(f"    Number of steps per fold: {steps}")
        if remainder != 0:
            print(f"    Last fold only includes {remainder} observations.")
        print("")
        for i in range(folds):
            if fixed_train_size:
                # The train size doesn't increase but moves by `steps` in each iteration.
                train_idx_start = i * steps
                train_idx_end = initial_train_size + i * steps
            else:
                # The train size increases by `steps` in each iteration.
                train_idx_start = 0
                train_idx_end = initial_train_size + i * steps
            print(f"Data partition in fold: {i}")
            if i < folds - 1:
                print(f"    Training:   {y.index[train_idx_start]} -- {y.index[train_idx_end - 1]}")
                print(f"    Validation: {y.index[train_idx_end]} -- {y.index[train_idx_end + steps - 1]}")
            else:
                print(f"    Training:   {y.index[train_idx_start]} -- {y.index[train_idx_end - 1]}")
                print(f"    Validation: {y.index[train_idx_end]} -- {y.index[-1]}")
        print("")
        
    if folds > 50:
        print(
            f"Forecaster will be fit {folds} times. This can take substantial amounts of time. "
            f"If not feasible, try with `refit = False`. \n"
        )
    
    for i in range(folds):
        # In each iteration (except the last one) the model is fitted before making predictions.
        if fixed_train_size:
            # The train size doesn't increases but moves by `steps` in each iteration.
            train_idx_start = i * steps
            train_idx_end = initial_train_size + i * steps
        else:
            # The train size increases by `steps` in each iteration.
            train_idx_start = 0
            train_idx_end = initial_train_size + i * steps
            
        if exog is not None:
            next_window_exog = exog.iloc[train_idx_end:train_idx_end + steps, ]

        if interval is None:

            if i < folds - 1:
                if exog is None:
                    forecaster.fit(y=y.iloc[train_idx_start:train_idx_end])
                    pred = forecaster.predict(steps=steps)
                else:
                    forecaster.fit(
                        y = y.iloc[train_idx_start:train_idx_end], 
                        exog = exog.iloc[train_idx_start:train_idx_end, ]
                    )
                    pred = forecaster.predict(steps=steps, exog=next_window_exog)
            else:    
                if remainder == 0:
                    if exog is None:
                        forecaster.fit(y=y.iloc[train_idx_start:train_idx_end])
                        pred = forecaster.predict(steps=steps)
                    else:
                        forecaster.fit(
                            y = y.iloc[train_idx_start:train_idx_end], 
                            exog = exog.iloc[train_idx_start:train_idx_end, ]
                        )
                        pred = forecaster.predict(steps=steps, exog=next_window_exog)
                else:
                    # Only the remaining steps need to be predicted
                    steps = remainder
                    if exog is None:
                        forecaster.fit(y=y.iloc[train_idx_start:train_idx_end])
                        pred = forecaster.predict(steps=steps)
                    else:
                        forecaster.fit(
                            y = y.iloc[train_idx_start:train_idx_end], 
                            exog = exog.iloc[train_idx_start:train_idx_end, ]
                        )
                        pred = forecaster.predict(steps=steps, exog=next_window_exog)
        else:

            if i < folds - 1:
                if exog is None:
                    forecaster.fit(y=y.iloc[train_idx_start:train_idx_end])
                    pred = forecaster.predict_interval(
                                steps        = steps,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                            )
                else:
                    forecaster.fit(
                        y = y.iloc[train_idx_start:train_idx_end], 
                        exog = exog.iloc[train_idx_start:train_idx_end, ]
                    )
                    pred = forecaster.predict_interval(
                                steps        = steps,
                                exog         = next_window_exog,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                           )
            else:    
                if remainder == 0:
                    if exog is None:
                        forecaster.fit(y=y.iloc[train_idx_start:train_idx_end])
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                            )
                    else:
                        forecaster.fit(
                            y = y.iloc[train_idx_start:train_idx_end], 
                            exog = exog.iloc[train_idx_start:train_idx_end, ]
                        )
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                exog         = next_window_exog,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                           )
                else:
                    # Only the remaining steps need to be predicted
                    steps = remainder
                    if exog is None:
                        forecaster.fit(y=y.iloc[train_idx_start:train_idx_end])
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                            )
                    else:
                        forecaster.fit(
                            y = y.iloc[train_idx_start:train_idx_end], 
                            exog = exog.iloc[train_idx_start:train_idx_end, ]
                        )
                        pred = forecaster.predict_interval(
                                steps        = steps,
                                exog         = next_window_exog,
                                interval     = interval,
                                n_boot       = n_boot,
                                random_state = random_state,
                                in_sample_residuals = in_sample_residuals
                           )

        backtest_predictions.append(pred)
    
    backtest_predictions = pd.concat(backtest_predictions)
    if isinstance(backtest_predictions, pd.Series):
        backtest_predictions = pd.DataFrame(backtest_predictions)

    metric_value = metric(
                    y_true = y.iloc[initial_train_size: initial_train_size + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )

    return metric_value, backtest_predictions

In [129]:
y = pd.Series(np.arange(40))
forecaster = ForecasterAutoreg(
                regressor = RandomForestRegressor(random_state=123),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 15
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

In [130]:
metric, predictions_backtest = _backtesting_forecaster_refit_n(
                                    forecaster = forecaster,
                                    y          = y,
                                    initial_train_size = len(y_train),
                                    fixed_train_size = True,
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

print(metric, predictions_backtest)

Information of backtesting process
----------------------------------
Number of observations used for initial training: 25
Number of observations used for backtesting: 15
    Number of folds: 4
    Number of steps per fold: 4
    Last fold only includes 3 observations.

Data partition in fold: 0
    Training:   0 -- 24
    Validation: 25 -- 28
Data partition in fold: 1
    Training:   4 -- 28
    Validation: 29 -- 32
Data partition in fold: 2
    Training:   8 -- 32
    Validation: 33 -- 36
Data partition in fold: 3
    Training:   12 -- 36
    Validation: 37 -- 39

[10.59823333]      pred
25  23.33
26  23.33
27  23.33
28  23.33
29  27.33
30  27.33
31  27.33
32  27.33
33  31.33
34  31.33
35  31.33
36  31.33
37  35.33
38  35.33
39  35.33


## Testing

In [47]:
# Fixtures Testing Backtesting
# ==============================================================================
np.random.seed(123)
y_rnd = np.random.rand(50)
print(y_rnd)
exog_rnd = np.random.rand(50)
print(exog_rnd)
out_sample_residuals_rnd = np.random.rand(50)
out_sample_residuals_rnd

[0.69646919 0.28613933 0.22685145 0.55131477 0.71946897 0.42310646
 0.9807642  0.68482974 0.4809319  0.39211752 0.34317802 0.72904971
 0.43857224 0.0596779  0.39804426 0.73799541 0.18249173 0.17545176
 0.53155137 0.53182759 0.63440096 0.84943179 0.72445532 0.61102351
 0.72244338 0.32295891 0.36178866 0.22826323 0.29371405 0.63097612
 0.09210494 0.43370117 0.43086276 0.4936851  0.42583029 0.31226122
 0.42635131 0.89338916 0.94416002 0.50183668 0.62395295 0.1156184
 0.31728548 0.41482621 0.86630916 0.25045537 0.48303426 0.98555979
 0.51948512 0.61289453]
[0.12062867 0.8263408  0.60306013 0.54506801 0.34276383 0.30412079
 0.41702221 0.68130077 0.87545684 0.51042234 0.66931378 0.58593655
 0.6249035  0.67468905 0.84234244 0.08319499 0.76368284 0.24366637
 0.19422296 0.57245696 0.09571252 0.88532683 0.62724897 0.72341636
 0.01612921 0.59443188 0.55678519 0.15895964 0.15307052 0.69552953
 0.31876643 0.6919703  0.55438325 0.38895057 0.92513249 0.84167
 0.35739757 0.04359146 0.30476807 0.398185

array([0.51312815, 0.66662455, 0.10590849, 0.13089495, 0.32198061,
       0.66156434, 0.84650623, 0.55325734, 0.85445249, 0.38483781,
       0.3167879 , 0.35426468, 0.17108183, 0.82911263, 0.33867085,
       0.55237008, 0.57855147, 0.52153306, 0.00268806, 0.98834542,
       0.90534158, 0.20763586, 0.29248941, 0.52001015, 0.90191137,
       0.98363088, 0.25754206, 0.56435904, 0.80696868, 0.39437005,
       0.73107304, 0.16106901, 0.60069857, 0.86586446, 0.98352161,
       0.07936579, 0.42834727, 0.20454286, 0.45063649, 0.54776357,
       0.09332671, 0.29686078, 0.92758424, 0.56900373, 0.457412  ,
       0.75352599, 0.74186215, 0.04857903, 0.7086974 , 0.83924335])

In [13]:
# Fixtures Testing Backtesting
# ==============================================================================
y = pd.Series(
    np.array([0.69646919, 0.28613933, 0.22685145, 0.55131477, 0.71946897,
              0.42310646, 0.9807642 , 0.68482974, 0.4809319 , 0.39211752,
              0.34317802, 0.72904971, 0.43857224, 0.0596779 , 0.39804426,
              0.73799541, 0.18249173, 0.17545176, 0.53155137, 0.53182759,
              0.63440096, 0.84943179, 0.72445532, 0.61102351, 0.72244338,
              0.32295891, 0.36178866, 0.22826323, 0.29371405, 0.63097612,
              0.09210494, 0.43370117, 0.43086276, 0.4936851 , 0.42583029,
              0.31226122, 0.42635131, 0.89338916, 0.94416002, 0.50183668,
              0.62395295, 0.1156184 , 0.31728548, 0.41482621, 0.86630916,
              0.25045537, 0.48303426, 0.98555979, 0.51948512, 0.61289453]))

exog = pd.Series(
    np.array([0.12062867, 0.8263408 , 0.60306013, 0.54506801, 0.34276383,
              0.30412079, 0.41702221, 0.68130077, 0.87545684, 0.51042234,
              0.66931378, 0.58593655, 0.6249035 , 0.67468905, 0.84234244,
              0.08319499, 0.76368284, 0.24366637, 0.19422296, 0.57245696,
              0.09571252, 0.88532683, 0.62724897, 0.72341636, 0.01612921,
              0.59443188, 0.55678519, 0.15895964, 0.15307052, 0.69552953,
              0.31876643, 0.6919703 , 0.55438325, 0.38895057, 0.92513249,
              0.84167   , 0.35739757, 0.04359146, 0.30476807, 0.39818568,
              0.70495883, 0.99535848, 0.35591487, 0.76254781, 0.59317692,
              0.6917018 , 0.15112745, 0.39887629, 0.2408559 , 0.34345601]), 
    name='exog')

out_sample_residuals = pd.Series(
    np.array([0.51312815, 0.66662455, 0.10590849, 0.13089495, 0.32198061,
              0.66156434, 0.84650623, 0.55325734, 0.85445249, 0.38483781,
              0.3167879 , 0.35426468, 0.17108183, 0.82911263, 0.33867085,
              0.55237008, 0.57855147, 0.52153306, 0.00268806, 0.98834542,
              0.90534158, 0.20763586, 0.29248941, 0.52001015, 0.90191137,
              0.98363088, 0.25754206, 0.56435904, 0.80696868, 0.39437005,
              0.73107304, 0.16106901, 0.60069857, 0.86586446, 0.98352161,
              0.07936579, 0.42834727, 0.20454286, 0.45063649, 0.54776357,
              0.09332671, 0.29686078, 0.92758424, 0.56900373, 0.457412  ,
              0.75352599, 0.74186215, 0.04857903, 0.7086974 , 0.83924335]),
    name='out_sample_residuals')

### Mocked backtesting No interval

#### No interval no exog no remainder

In [10]:
# Mocked backtesting no exog no remainder
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    initial_train_size = len(y_train),
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 41
Data partition in fold: 1
    Training:   0 -- 41
    Validation: 42 -- 45
Data partition in fold: 2
    Training:   0 -- 45
    Validation: 46 -- 49

[0.06598803]
0.06598802629306816


array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292,
       0.52778339, 0.49152015, 0.4841678 , 0.4076433 , 0.50904672,
       0.50249462, 0.49232817])

In [11]:
metric_mocked_no_exog_no_remainder = np.array(0.06598802629306816)
assert metric_mocked_no_exog_no_remainder == metric
backtest_predictions_mocked_no_exog_no_remainder = pd.DataFrame({'pred':np.array([0.55717779, 0.43355138, 0.54969767,
                                                                                  0.52945466, 0.38969292, 0.52778339,
                                                                                  0.49152015, 0.4841678 , 0.4076433 ,
                                                                                  0.50904672, 0.50249462, 0.49232817])
                                                                }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_no_exog_no_remainder)

#### No interval no exog yes remainder

In [13]:
# Mocked backtesting no exog yes remainder
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    initial_train_size = len(y_train),
                                    steps      = 5,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 5
    Last fold only includes 2 observations.

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 42
Data partition in fold: 1
    Training:   0 -- 42
    Validation: 43 -- 47
Data partition in fold: 2
    Training:   0 -- 47
    Validation: 48 -- 49

[0.06916732]
0.06916732087926723


array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.48308861,
       0.5096801 , 0.49519677, 0.47997916, 0.49177914, 0.495797  ,
       0.57738724, 0.44370472])

In [14]:
metric_mocked_no_exog_yes_remainder = np.array(0.06916732087926723)
assert metric_mocked_no_exog_yes_remainder == metric
backtest_predictions_mocked_no_exog_yes_remainder = pd.DataFrame({'pred':np.array([0.55717779, 0.43355138, 0.54969767,
                                                                                   0.52945466, 0.48308861, 0.5096801 ,
                                                                                   0.49519677, 0.47997916, 0.49177914,
                                                                                   0.495797  , 0.57738724, 0.44370472])
                                                                 }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_no_exog_yes_remainder)

#### No interval yes exog no remainder

In [15]:
# Mocked backtesting yes exog no remainder
# ==============================================================================
assert len(y) == len(exog)
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y, exog=exog)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    exog       = exog,
                                    initial_train_size = len(y_train),
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 41
Data partition in fold: 1
    Training:   0 -- 41
    Validation: 42 -- 45
Data partition in fold: 2
    Training:   0 -- 45
    Validation: 46 -- 49

[0.05663345]
0.05663345135204598


  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '


array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.42295275,
       0.46286083, 0.43618422, 0.43552906, 0.48687517, 0.55455072,
       0.55577332, 0.53943402])

In [16]:
metric_mocked_yes_exog_no_remainder = np.array(0.05663345135204598)
assert metric_mocked_yes_exog_no_remainder == metric
backtest_predictions_mocked_yes_exog_no_remainder = pd.DataFrame({'pred':np.array([0.59059622, 0.47257504, 0.53024098, 
                                                                                   0.46163343, 0.42295275, 0.46286083,
                                                                                   0.43618422, 0.43552906, 0.48687517,
                                                                                   0.55455072, 0.55577332, 0.53943402]
                                                                                 )
                                                                 }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_yes_exog_no_remainder)

#### No interval yes exog yes remainder

In [17]:
# Mocked backtesting yes exog no remainder
# ==============================================================================
assert len(y) == len(exog)
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y, exog=exog)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    exog       = exog,
                                    initial_train_size = len(y_train),
                                    steps      = 5,
                                    metric     = 'mean_squared_error',
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 5
    Last fold only includes 2 observations.

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 42
Data partition in fold: 1
    Training:   0 -- 42
    Validation: 43 -- 47
Data partition in fold: 2
    Training:   0 -- 47
    Validation: 48 -- 49

[0.06172396]
0.061723961096013524


  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '


array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.50035119,
       0.43595809, 0.4349167 , 0.42381237, 0.55165332, 0.53442833,
       0.65361802, 0.51297419])

In [18]:
metric_mocked_yes_exog_yes_remainder = np.array(0.061723961096013524)
assert metric_mocked_yes_exog_yes_remainder == metric
backtest_predictions_mocked_yes_exog_yes_remainder = pd.DataFrame({'pred':np.array([0.59059622, 0.47257504, 0.53024098,
                                                                                    0.46163343, 0.50035119, 0.43595809,
                                                                                    0.4349167 , 0.42381237, 0.55165332,
                                                                                    0.53442833, 0.65361802, 0.51297419]
                                                                                  )
                                                                 }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_yes_exog_yes_remainder)

### Interval

#### Interval no exog no remainder

In [54]:
# Mocked backtesting interval no exog no remainder
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    initial_train_size = len(y_train),
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    interval   = [5, 95],
                                    n_boot     = 500,
                                    random_state = 123,
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values
backtest_predictions.lower_bound.values
# backtest_predictions.upper_bound.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 41
Data partition in fold: 1
    Training:   0 -- 41
    Validation: 42 -- 45
Data partition in fold: 2
    Training:   0 -- 45
    Validation: 46 -- 49

[0.06598803]
0.06598802629306816


array([0.19882822, 0.08272406, 0.18106389, 0.18777395, 0.03520425,
       0.12926384, 0.0495347 , 0.04527341, 0.0113795 , 0.13676538,
       0.12478441, 0.06814153])

In [55]:
metric_mocked_interval_no_exog_no_remainder = np.array(0.06598802629306816)
assert metric_mocked_interval_no_exog_no_remainder == metric

backtest_predictions_mocked_interval_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292, 0.52778339, 
                     0.49152015, 0.4841678, 0.4076433, 0.50904672, 0.50249462, 0.49232817]),
    'lower_bound':np.array([0.19882822, 0.08272406, 0.18106389, 0.18777395, 0.03520425, 0.12926384,
                            0.0495347 , 0.04527341, 0.0113795 , 0.13676538, 0.12478441, 0.06814153]),
    'upper_bound':np.array([0.95368172, 0.81704742, 0.93685716, 0.9407976 , 0.78486946, 0.93084605,
                            0.84533191, 0.90255909, 0.80099612, 0.88747244, 0.88292664, 0.88718366])                                                                 
                                                                         }, index=np.arange(38, 50))

pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_interval_no_exog_no_remainder)

#### Interval no exog yes remainder

In [56]:
# Mocked backtesting interval no exog no remainder
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    initial_train_size = len(y_train),
                                    steps      = 5,
                                    metric     = 'mean_squared_error',
                                    interval   = [5, 95],
                                    n_boot     = 500,
                                    random_state = 123,
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values
backtest_predictions.lower_bound.values
backtest_predictions.upper_bound.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 5
    Last fold only includes 2 observations.

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 42
Data partition in fold: 1
    Training:   0 -- 42
    Validation: 43 -- 47
Data partition in fold: 2
    Training:   0 -- 47
    Validation: 48 -- 49

[0.06916732]
0.06916732087926723


array([0.95368172, 0.81704742, 0.93685716, 0.9407976 , 0.85396419,
       0.86172991, 0.88313129, 0.82354636, 0.93875053, 0.86176335,
       0.96037185, 0.84205069])

In [57]:
metric_mocked_interval_no_exog_yes_remainder = np.array(0.06916732087926723)
assert metric_mocked_interval_no_exog_yes_remainder == metric

backtest_predictions_mocked_interval_no_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.48308861, 0.5096801 , 
                     0.49519677, 0.47997916, 0.49177914, 0.495797  , 0.57738724, 0.44370472]),
    'lower_bound':np.array([0.19882822, 0.08272406, 0.18106389, 0.18777395, 0.1238825 , 0.06681772,
                            0.09795868, 0.08383945, 0.10160946, 0.08917676, 0.23321023, 0.08685352]),
    'upper_bound':np.array([0.95368172, 0.81704742, 0.93685716, 0.9407976 , 0.85396419, 0.86172991,
                            0.88313129, 0.82354636, 0.93875053, 0.86176335, 0.96037185, 0.84205069])                                                                 
                                                                }, index=np.arange(38, 50))

pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_interval_no_exog_yes_remainder)

#### Interval yes exgo no remainder

In [58]:
# Mocked backtesting interval yes exog no remainder
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y, exog=exog)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    exog       = exog,
                                    initial_train_size = len(y_train),
                                    steps      = 4,
                                    metric     = 'mean_squared_error',
                                    interval   = [5, 95],
                                    n_boot     = 500,
                                    random_state = 123,
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values
# backtest_predictions.lower_bound.values
backtest_predictions.upper_bound.values

  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '


Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 41
Data partition in fold: 1
    Training:   0 -- 41
    Validation: 42 -- 45
Data partition in fold: 2
    Training:   0 -- 45
    Validation: 46 -- 49



  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '


[0.05663345]
0.05663345135204598


array([0.95777604, 0.88685543, 0.90755063, 0.87811336, 0.8225198 ,
       0.81894689, 0.81179723, 0.84420112, 0.89407425, 0.93903702,
       0.91748574, 0.93705358])

In [59]:
metric_mocked_interval_yes_exog_no_remainder = np.array(0.05663345135204598)
assert metric_mocked_interval_yes_exog_no_remainder == metric

backtest_predictions_mocked_interval_yes_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.42295275, 0.46286083,
                     0.43618422, 0.43552906, 0.48687517, 0.55455072, 0.55577332, 0.53943402]),
    'lower_bound':np.array([0.24619375, 0.10545295, 0.13120713, 0.08044217, 0.07440334, 0.11331854,
                            0.01436362, 0.02747413, 0.14867238, 0.19834047, 0.19884259, 0.16964474]),
    'upper_bound':np.array([0.95777604, 0.88685543, 0.90755063, 0.87811336, 0.8225198 , 0.81894689,
                            0.81179723, 0.84420112, 0.89407425, 0.93903702, 0.91748574, 0.93705358])                                                                 
                                                                         }, index=np.arange(38, 50))

pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_interval_yes_exog_no_remainder)

#### Interval yes exog yes remainder

In [60]:
# Mocked backtesting interval yes exog yes remainder
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y, exog=exog)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    exog       = exog,
                                    initial_train_size = len(y_train),
                                    steps      = 5,
                                    metric     = 'mean_squared_error',
                                    interval   = [5, 95],
                                    n_boot     = 500,
                                    random_state = 123,
                                    verbose    = True
                               )

print(metric)
metric_value = get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values
backtest_predictions.lower_bound.values
backtest_predictions.upper_bound.values

  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '


Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 5
    Last fold only includes 2 observations.

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 42
Data partition in fold: 1
    Training:   0 -- 42
    Validation: 43 -- 47
Data partition in fold: 2
    Training:   0 -- 47
    Validation: 48 -- 49



  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '
  ('`exog` has DatetimeIndex index but no frequency. The index is '


[0.06172396]
0.061723961096013524


array([0.95777604, 0.88685543, 0.90755063, 0.87811336, 0.86891022,
       0.74808834, 0.80296989, 0.77919033, 0.97680126, 0.8877086 ,
       1.07608747, 0.90555785])

In [61]:
metric_mocked_interval_yes_exog_yes_remainder = np.array(0.061723961096013524)
assert metric_mocked_interval_yes_exog_yes_remainder == metric

backtest_predictions_mocked_interval_yes_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.50035119, 0.43595809,
                     0.4349167 , 0.42381237, 0.55165332, 0.53442833, 0.65361802, 0.51297419]),
    'lower_bound':np.array([0.24619375, 0.10545295, 0.13120713, 0.08044217, 0.13725077, 0.08041239,
                            0.05015513, 0.07677812, 0.17434611, 0.16051962, 0.29167326, 0.15775686]),
    'upper_bound':np.array([0.95777604, 0.88685543, 0.90755063, 0.87811336, 0.86891022, 0.74808834,
                            0.80296989, 0.77919033, 0.97680126, 0.8877086 , 1.07608747, 0.90555785])                                                                 
                                                                         }, index=np.arange(38, 50))

pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_interval_yes_exog_yes_remainder)

### Tests

####  No interval

In [18]:
# Fixtures _backtesting_forecaster_refit No exog No remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_no_exog_no_remainder = 0.06598802629306816
backtest_predictions_mocked_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292, 0.52778339,
                     0.49152015, 0.4841678 , 0.4076433 , 0.50904672, 0.50249462, 0.49232817])
                                                                }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit No exog Yes remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_no_exog_yes_remainder = 0.06916732087926723
backtest_predictions_mocked_no_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.48308861, 0.5096801 ,
                    0.49519677, 0.47997916, 0.49177914, 0.495797  , 0.57738724, 0.44370472])
                                                                 }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit Yes exog No remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_yes_exog_no_remainder = 0.05663345135204598
backtest_predictions_mocked_yes_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.42295275, 0.46286083,
                     0.43618422, 0.43552906, 0.48687517, 0.55455072, 0.55577332, 0.53943402])
                                                                 }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit Yes exog Yes remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_yes_exog_yes_remainder = 0.061723961096013524
backtest_predictions_mocked_yes_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.50035119, 0.43595809,
                     0.4349167 , 0.42381237, 0.55165332, 0.53442833, 0.65361802, 0.51297419])
                                                                 }, index=np.arange(38, 50))


def test_output_backtesting_forecaster_refit_no_exog_no_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_no_exog_no_remainder
    expected_backtest_predictions = backtest_predictions_mocked_no_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_no_exog_yes_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=5 (2 remainder), metric='mean_squared_error'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        steps               = 5,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_no_exog_yes_remainder
    expected_backtest_predictions = backtest_predictions_mocked_no_exog_yes_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_yes_exog_no_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, exog is mocked, 
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = exog,
                                        initial_train_size  = len(y_train),
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_yes_exog_no_remainder
    expected_backtest_predictions = backtest_predictions_mocked_yes_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_yes_exog_yes_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, exog is mocked, 
    12 observations to backtest, steps=5 (2 remainder), metric='mean_squared_error'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = exog,
                                        initial_train_size  = len(y_train),
                                        steps               = 5,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_yes_exog_yes_remainder
    expected_backtest_predictions = backtest_predictions_mocked_yes_exog_yes_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)

In [19]:
test_output_backtesting_forecaster_refit_no_exog_no_remainder_with_mocked()
test_output_backtesting_forecaster_refit_no_exog_yes_remainder_with_mocked()
test_output_backtesting_forecaster_refit_yes_exog_no_remainder_with_mocked()
test_output_backtesting_forecaster_refit_yes_exog_yes_remainder_with_mocked()

#### Interval

In [24]:
# Fixtures _backtesting_forecaster_refit Interval No exog No remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_interval_no_exog_no_remainder = 0.0659880262930681
backtest_predictions_mocked_interval_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292, 0.52778339, 
                     0.49152015, 0.4841678, 0.4076433, 0.50904672, 0.50249462, 0.49232817]),
    'lower_bound':np.array([0.19882822, 0.08272406, 0.18106389, 0.18777395, 0.03520425, 0.12926384,
                            0.0495347 , 0.04527341, 0.0113795 , 0.13676538, 0.12478441, 0.06814153]),
    'upper_bound':np.array([0.95368172, 0.81704742, 0.93685716, 0.9407976 , 0.78486946, 0.93084605,
                            0.84533191, 0.90255909, 0.80099612, 0.88747244, 0.88292664, 0.88718366])                                                                 
                                                                         }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit Interval No exog Yes remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_interval_no_exog_yes_remainder = 0.06916732087926723
backtest_predictions_mocked_interval_no_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.48308861, 0.5096801 , 
                     0.49519677, 0.47997916, 0.49177914, 0.495797  , 0.57738724, 0.44370472]),
    'lower_bound':np.array([0.19882822, 0.08272406, 0.18106389, 0.18777395, 0.1238825 , 0.06681772,
                            0.09795868, 0.08383945, 0.10160946, 0.08917676, 0.23321023, 0.08685352]),
    'upper_bound':np.array([0.95368172, 0.81704742, 0.93685716, 0.9407976 , 0.85396419, 0.86172991,
                            0.88313129, 0.82354636, 0.93875053, 0.86176335, 0.96037185, 0.84205069])                                                                 
                                                                }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit Interval Yes exog No remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_interval_yes_exog_no_remainder = 0.05663345135204598
backtest_predictions_mocked_interval_yes_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.42295275, 0.46286083,
                     0.43618422, 0.43552906, 0.48687517, 0.55455072, 0.55577332, 0.53943402]),
    'lower_bound':np.array([0.24619375, 0.10545295, 0.13120713, 0.08044217, 0.07440334, 0.11331854,
                            0.01436362, 0.02747413, 0.14867238, 0.19834047, 0.19884259, 0.16964474]),
    'upper_bound':np.array([0.95777604, 0.88685543, 0.90755063, 0.87811336, 0.8225198 , 0.81894689,
                            0.81179723, 0.84420112, 0.89407425, 0.93903702, 0.91748574, 0.93705358])                                                                 
                                                                         }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit Interval Yes exog Yes remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_interval_yes_exog_yes_remainder = 0.061723961096013524
backtest_predictions_mocked_interval_yes_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.50035119, 0.43595809,
                     0.4349167 , 0.42381237, 0.55165332, 0.53442833, 0.65361802, 0.51297419]),
    'lower_bound':np.array([0.24619375, 0.10545295, 0.13120713, 0.08044217, 0.13725077, 0.08041239,
                            0.05015513, 0.07677812, 0.17434611, 0.16051962, 0.29167326, 0.15775686]),
    'upper_bound':np.array([0.95777604, 0.88685543, 0.90755063, 0.87811336, 0.86891022, 0.74808834,
                            0.80296989, 0.77919033, 0.97680126, 0.8877086 , 1.07608747, 0.90555785])                                                                 
                                                                         }, index=np.arange(38, 50))


def test_output_backtesting_forecaster_refit_interval_no_exog_no_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval yes.
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error',
    'in_sample_residuals = True'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = [5, 95],
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   ) 
    expected_metric = metric_mocked_interval_no_exog_no_remainder
    expected_backtest_predictions = backtest_predictions_mocked_interval_no_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_interval_no_exog_yes_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval yes. 
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=5 (2 remainder), metric='mean_squared_error',
    'in_sample_residuals = True'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        steps               = 5,
                                        metric              = 'mean_squared_error',
                                        interval            = [5, 95],
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
    )
    expected_metric = metric_mocked_interval_no_exog_yes_remainder
    expected_backtest_predictions = backtest_predictions_mocked_interval_no_exog_yes_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_interval_yes_exog_no_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval yes.
    Regressor is LinearRegression with lags=3, Series y is mocked, exog is mocked, 
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error',
    'in_sample_residuals = True'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = exog,
                                        initial_train_size  = len(y_train),
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = [5, 95],
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_interval_yes_exog_no_remainder
    expected_backtest_predictions = backtest_predictions_mocked_interval_yes_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_interval_yes_exog_yes_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval yes. 
    Regressor is LinearRegression with lags=3, Series y is mocked, exog is mocked, 
    12 observations to backtest, steps=5 (2 remainder), metric='mean_squared_error',
    'in_sample_residuals = True'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = exog,
                                        initial_train_size  = len(y_train),
                                        steps               = 5,
                                        metric              = 'mean_squared_error',
                                        interval            = [5, 95],
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_interval_yes_exog_yes_remainder
    expected_backtest_predictions = backtest_predictions_mocked_interval_yes_exog_yes_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)

In [None]:
test_output_backtesting_forecaster_refit_interval_no_exog_no_remainder_with_mocked()
test_output_backtesting_forecaster_refit_interval_no_exog_yes_remainder_with_mocked()
test_output_backtesting_forecaster_refit_interval_yes_exog_no_remainder_with_mocked()
test_output_backtesting_forecaster_refit_interval_yes_exog_yes_remainder_with_mocked()

#### Interval in_sample_residuals = False

In [22]:
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3
             )
forecaster.fit(y=y)
forecaster.set_out_sample_residuals(residuals=out_sample_residuals, append=False)

In [23]:
forecaster.out_sample_residuals.head(2)

0    0.513128
1    0.666625
Name: out_sample_residuals, dtype: float64

In [14]:
# Mocked backtesting interval no exog no remainder out_sample_residuals
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3
             )
forecaster.fit(y=y)
forecaster.set_out_sample_residuals(residuals=out_sample_residuals, append=False)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = [5, 95],
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = False,
                                        verbose             = False
                                   )

metric = _get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric)
display(backtest_predictions.pred.values)
display(backtest_predictions.lower_bound.values)
display(backtest_predictions.upper_bound.values)

0.06598802629306816


array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292,
       0.52778339, 0.49152015, 0.4841678 , 0.4076433 , 0.50904672,
       0.50249462, 0.49232817])

array([0.63654358, 0.62989756, 0.67156167, 0.70363265, 0.46905871,
       0.73507276, 0.64554988, 0.64183541, 0.48700909, 0.68845988,
       0.63865297, 0.60684242])

array([1.54070487, 1.5131313 , 1.56749058, 1.62564968, 1.37322   ,
       1.61930035, 1.54870568, 1.55335041, 1.39117037, 1.56935123,
       1.51973211, 1.50300901])

In [16]:
metric_mocked_interval_out_sample_residuals_no_exog_no_remainder = 0.06598802629306816
assert metric_mocked_interval_out_sample_residuals_no_exog_no_remainder == metric

backtest_predictions_mocked_interval_out_sample_residuals_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292, 0.52778339, 
                     0.49152015, 0.4841678 , 0.4076433 , 0.50904672, 0.50249462, 0.49232817]),
    'lower_bound':np.array([0.63654358, 0.62989756, 0.67156167, 0.70363265, 0.46905871, 0.73507276, 
                            0.64554988, 0.64183541, 0.48700909, 0.68845988, 0.63865297, 0.60684242]),
    'upper_bound':np.array([1.54070487, 1.5131313 , 1.56749058, 1.62564968, 1.37322   , 1.61930035, 
                            1.54870568, 1.55335041, 1.39117037, 1.56935123, 1.51973211, 1.50300901])                                                                
                                                                         }, index=np.arange(38, 50))

pd.testing.assert_frame_equal(backtest_predictions, backtest_predictions_mocked_interval_out_sample_residuals_no_exog_no_remainder)

In [18]:
# Fixtures _backtesting_forecaster_refit No exog No remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_interval_out_sample_residuals_no_exog_no_remainder = 0.06598802629306816
backtest_predictions_mocked_interval_out_sample_residuals_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292, 0.52778339, 
                     0.49152015, 0.4841678 , 0.4076433 , 0.50904672, 0.50249462, 0.49232817]),
    'lower_bound':np.array([0.63654358, 0.62989756, 0.67156167, 0.70363265, 0.46905871, 0.73507276, 
                            0.64554988, 0.64183541, 0.48700909, 0.68845988, 0.63865297, 0.60684242]),
    'upper_bound':np.array([1.54070487, 1.5131313 , 1.56749058, 1.62564968, 1.37322   , 1.61930035, 
                            1.54870568, 1.55335041, 1.39117037, 1.56935123, 1.51973211, 1.50300901])                                                                
                                                                         }, index=np.arange(38, 50))


def test_output_backtesting_forecaster_refit_interval_out_sample_residuals_no_exog_no_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval yes.
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error',
    'in_sample_residuals = False'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)
    forecaster.set_out_sample_residuals(residuals=out_sample_residuals, append=False)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = [5, 95],
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = False,
                                        verbose             = False
                                   ) 
    expected_metric = metric_mocked_interval_out_sample_residuals_no_exog_no_remainder
    expected_backtest_predictions = backtest_predictions_mocked_interval_out_sample_residuals_no_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)

In [19]:
test_output_backtesting_forecaster_refit_interval_out_sample_residuals_no_exog_no_remainder_with_mocked()

#### Calleable metric

In [15]:
# Mocked backtesting no exog no remainder my metric calleable
# ==============================================================================
def my_metric(y_true, y_pred):
    '''
    Calleable metric
    '''
    metric = ((y_true - y_pred)/len(y_true)).mean()
    
    return metric
    
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

metric, backtest_predictions = _backtesting_forecaster_refit(
                                    forecaster = forecaster,
                                    y          = y,
                                    initial_train_size = len(y_train),
                                    steps      = 4,
                                    metric     = my_metric,
                                    verbose    = True
                               )

print(metric)

metric_value = my_metric(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions)],
                    y_pred = backtest_predictions['pred']
                   )
print(metric_value)
backtest_predictions.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 12
    Number of folds: 3
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 41
Data partition in fold: 1
    Training:   0 -- 41
    Validation: 42 -- 45
Data partition in fold: 2
    Training:   0 -- 45
    Validation: 46 -- 49

0.005283745900436151
0.005283745900436151


array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292,
       0.52778339, 0.49152015, 0.4841678 , 0.4076433 , 0.50904672,
       0.50249462, 0.49232817])

In [16]:
# Fixtures _backtesting_forecaster_refit No exog No remainder (skforecast==0.4.2)
# ==============================================================================
my_metric_mocked_no_exog_no_remainder = 0.005283745900436151
assert my_metric_mocked_no_exog_no_remainder == metric
my_metric_backtest_predictions_mocked_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292, 0.52778339,
                     0.49152015, 0.4841678 , 0.4076433 , 0.50904672, 0.50249462, 0.49232817])
                                                                }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions, my_metric_backtest_predictions_mocked_no_exog_no_remainder)

In [17]:
# Fixtures _backtesting_forecaster_refit No exog No remainder (skforecast==0.4.2)
# ==============================================================================
my_metric_mocked_no_exog_no_remainder = 0.005283745900436151
my_metric_backtest_predictions_mocked_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.38969292, 0.52778339,
                     0.49152015, 0.4841678 , 0.4076433 , 0.50904672, 0.50249462, 0.49232817])
                                                                }, index=np.arange(38, 50))


def my_metric(y_true, y_pred):
    '''
    Calleable metric
    '''
    metric = ((y_true - y_pred)/len(y_true)).mean()
    
    return metric

def test_calleable_metric_backtesting_forecaster_refit_no_exog_no_remainder_with_mocked():
    '''
    Test calleable metric in _backtesting_forecaster_refit with backtesting mocked, interval no. 
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error'
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        steps               = 4,
                                        metric              = my_metric,
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = my_metric_mocked_no_exog_no_remainder
    expected_backtest_predictions = my_metric_backtest_predictions_mocked_no_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)

In [18]:
test_calleable_metric_backtesting_forecaster_refit_no_exog_no_remainder_with_mocked()

#### Fixed train size

**Mocked backtesting no exog no remainder fixed train size**

In [35]:
# Mocked backtesting no exog no remainder fixed train size
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

initial_train_size = len(y_train)
steps = 4
folds = int(np.ceil((len(y) - initial_train_size) / steps))

metric_all = []
backtest_predictions_all = pd.DataFrame()
for i in range(folds):
    train_idx_i = 0 + i * steps
    train_idx_f = initial_train_size + i * steps
    y_complete = y[train_idx_i:train_idx_f+steps]
    y_train = y[train_idx_i:train_idx_f]
    
    metric, backtest_predictions = _backtesting_forecaster_refit(
                                            forecaster          = forecaster,
                                            y                   = y_complete,
                                            exog                = None,
                                            initial_train_size  = len(y_train),
                                            steps               = steps,
                                            metric              = 'mean_squared_error',
                                            interval            = None,
                                            n_boot              = 500,
                                            random_state        = 123,
                                            in_sample_residuals = True,
                                            verbose             = True
                                       )
    backtest_predictions_all = pd.concat([backtest_predictions_all, backtest_predictions])

metric = _get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions_all)],
                    y_pred = backtest_predictions_all['pred']
                   )
print(metric)
backtest_predictions_all.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 4
    Number of folds: 1
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 41

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 4
    Number of folds: 1
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   4 -- 41
    Validation: 42 -- 45

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 4
    Number of folds: 1
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   8 -- 45
    Validation: 46 -- 49

0.06720844584333846


array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.34597367,
       0.50223873, 0.47833829, 0.46082257, 0.37810191, 0.49508366,
       0.48808014, 0.47323313])

In [36]:
metric_mocked_fixed_train_no_exog_no_remainder = 0.06720844584333846
assert metric_mocked_fixed_train_no_exog_no_remainder == metric
backtest_predictions_mocked_fixed_train_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.34597367, 0.50223873,
                     0.47833829, 0.46082257, 0.37810191, 0.49508366, 0.48808014, 0.47323313])
                                                                }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions_all, backtest_predictions_mocked_fixed_train_no_exog_no_remainder)

**Mocked backtesting no exog yes remainder fixed train size**

In [37]:
# Mocked backtesting no exog yes remainder fixed train size
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

initial_train_size = len(y_train)
steps = 5
folds = int(np.ceil((len(y) - initial_train_size) / steps))

metric_all = []
backtest_predictions_all = pd.DataFrame()
for i in range(folds):
    train_idx_i = 0 + i * steps
    train_idx_f = initial_train_size + i * steps
    
    if train_idx_f+steps < len(y):
        y_complete = y[train_idx_i:train_idx_f+steps]
    else:
        y_complete = y[train_idx_i:]
        
    y_train = y[train_idx_i:train_idx_f]
    
    metric, backtest_predictions = _backtesting_forecaster_refit(
                                            forecaster          = forecaster,
                                            y                   = y_complete,
                                            exog                = None,
                                            initial_train_size  = len(y_train),
                                            steps               = steps,
                                            metric              = 'mean_squared_error',
                                            interval            = None,
                                            n_boot              = 500,
                                            random_state        = 123,
                                            in_sample_residuals = True,
                                            verbose             = True
                                       )
    backtest_predictions_all = pd.concat([backtest_predictions_all, backtest_predictions])

metric = _get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions_all)],
                    y_pred = backtest_predictions_all['pred']
                   )
print(metric)
backtest_predictions_all.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 5
    Number of folds: 1
    Number of steps per fold: 5

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 42

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 5
    Number of folds: 1
    Number of steps per fold: 5

Data partition in fold: 0
    Training:   5 -- 42
    Validation: 43 -- 47

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 2
    Number of folds: 1
    Number of steps per fold: 5
    Last fold only includes 2 observations.

Data partition in fold: 0
    Training:   10 -- 47
    Validation: 48 -- 49

0.07217085374372428


array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.48308861,
       0.4909399 , 0.47942107, 0.46025344, 0.46649132, 0.47061725,
       0.57603136, 0.41480551])

In [39]:
metric_mocked_fixed_train_no_exog_yes_remainder = 0.07217085374372428
assert metric_mocked_fixed_train_no_exog_yes_remainder == metric
backtest_predictions_mocked_fixed_train_no_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.48308861, 0.4909399 , 
                     0.47942107, 0.46025344, 0.46649132, 0.47061725, 0.57603136, 0.41480551])
                                                                }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions_all, backtest_predictions_mocked_fixed_train_no_exog_yes_remainder)

**Mocked backtesting Yes exog No remainder fixed train size**

In [40]:
# Mocked backtesting yes exog no remainder fixed train size
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y, exog=exog)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

initial_train_size = len(y_train)
steps = 4
folds = int(np.ceil((len(y) - initial_train_size) / steps))

backtest_predictions_all = pd.DataFrame()
for i in range(folds):
    train_idx_i = 0 + i * steps
    train_idx_f = initial_train_size + i * steps
    y_complete = y[train_idx_i:train_idx_f+steps]
    exog_complete = exog[train_idx_i:train_idx_f+steps]
    y_train = y[train_idx_i:train_idx_f]
    
    metric, backtest_predictions = _backtesting_forecaster_refit(
                                            forecaster          = forecaster,
                                            y                   = y_complete,
                                            exog                = exog_complete,
                                            initial_train_size  = len(y_train),
                                            steps               = steps,
                                            metric              = 'mean_squared_error',
                                            interval            = None,
                                            n_boot              = 500,
                                            random_state        = 123,
                                            in_sample_residuals = True,
                                            verbose             = True
                                       )
    backtest_predictions_all = pd.concat([backtest_predictions_all, backtest_predictions])

metric = _get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions_all)],
                    y_pred = backtest_predictions_all['pred']
                   )
print(metric)
backtest_predictions_all.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 4
    Number of folds: 1
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 41

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 4
    Number of folds: 1
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   4 -- 41
    Validation: 42 -- 45

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 4
    Number of folds: 1
    Number of steps per fold: 4

Data partition in fold: 0
    Training:   8 -- 45
    Validation: 46 -- 49

0.05758244401484334


array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.37689967,
       0.44267729, 0.42642836, 0.41604275, 0.45047245, 0.53784704,
       0.53726274, 0.51516772])

In [41]:
metric_mocked_fixed_train_yes_exog_no_remainder = 0.05758244401484334
assert metric_mocked_fixed_train_yes_exog_no_remainder == metric
backtest_predictions_mocked_fixed_train_yes_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.37689967, 0.44267729, 
                     0.42642836, 0.41604275, 0.45047245, 0.53784704, 0.53726274, 0.51516772])
                                                                }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions_all, backtest_predictions_mocked_fixed_train_yes_exog_no_remainder)

**Mocked backtesting Yes exog Yes remainder fixed train size**

In [42]:
# Mocked backtesting yes exog yes remainder fixed train size
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags      = 3 
             )
forecaster.fit(y=y, exog=exog)

n_backtest = 12
y_train = y[:-n_backtest]
y_backtest = y[-n_backtest:]

initial_train_size = len(y_train)
steps = 5
folds = int(np.ceil((len(y) - initial_train_size) / steps))

backtest_predictions_all = pd.DataFrame()
for i in range(folds):
    train_idx_i = 0 + i * steps
    train_idx_f = initial_train_size + i * steps
    y_complete = y[train_idx_i:train_idx_f+steps]
    exog_complete = exog[train_idx_i:train_idx_f+steps]
    y_train = y[train_idx_i:train_idx_f]
    
    metric, backtest_predictions = _backtesting_forecaster_refit(
                                            forecaster          = forecaster,
                                            y                   = y_complete,
                                            exog                = exog_complete,
                                            initial_train_size  = len(y_train),
                                            steps               = steps,
                                            metric              = 'mean_squared_error',
                                            interval            = None,
                                            n_boot              = 500,
                                            random_state        = 123,
                                            in_sample_residuals = True,
                                            verbose             = True
                                       )
    backtest_predictions_all = pd.concat([backtest_predictions_all, backtest_predictions])

metric = _get_metric('mean_squared_error')(
                    y_true = y.iloc[len(y_train) : len(y_train) + len(backtest_predictions_all)],
                    y_pred = backtest_predictions_all['pred']
                   )
print(metric)
backtest_predictions_all.pred.values

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 5
    Number of folds: 1
    Number of steps per fold: 5

Data partition in fold: 0
    Training:   0 -- 37
    Validation: 38 -- 42

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 5
    Number of folds: 1
    Number of steps per fold: 5

Data partition in fold: 0
    Training:   5 -- 42
    Validation: 43 -- 47

Information of backtesting process
----------------------------------
Number of observations used for initial training: 38
Number of observations used for backtesting: 2
    Number of folds: 1
    Number of steps per fold: 5
    Last fold only includes 2 observations.

Data partition in fold: 0
    Training:   10 -- 47
    Validation: 48 -- 49

0.06425019123005545


array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.50035119,
       0.41975558, 0.4256614 , 0.41176005, 0.52357817, 0.509974  ,
       0.65354628, 0.48210726])

In [43]:
metric_mocked_fixed_train_yes_exog_yes_remainder = 0.06425019123005545
assert metric_mocked_fixed_train_yes_exog_yes_remainder == metric
backtest_predictions_mocked_fixed_train_yes_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.50035119, 0.41975558, 
                     0.4256614 , 0.41176005, 0.52357817, 0.509974  , 0.65354628, 0.48210726])
                                                                }, index=np.arange(38, 50))
pd.testing.assert_frame_equal(backtest_predictions_all, backtest_predictions_mocked_fixed_train_yes_exog_yes_remainder)

##### Test

In [48]:
# Fixtures _backtesting_forecaster_refit fixed_train_size No exog No remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_fixed_train_no_exog_no_remainder = 0.06720844584333846
backtest_predictions_mocked_fixed_train_no_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.34597367, 0.50223873,
                     0.47833829, 0.46082257, 0.37810191, 0.49508366, 0.48808014, 0.47323313])
                                                                }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit fixed_train_size No exog Yes remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_fixed_train_no_exog_yes_remainder = 0.07217085374372428
backtest_predictions_mocked_fixed_train_no_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.55717779, 0.43355138, 0.54969767, 0.52945466, 0.48308861, 0.4909399 , 
                     0.47942107, 0.46025344, 0.46649132, 0.47061725, 0.57603136, 0.41480551])
                                                                }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit fixed_train_size Yes exog No remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_fixed_train_yes_exog_no_remainder = 0.05758244401484334
backtest_predictions_mocked_fixed_train_yes_exog_no_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.37689967, 0.44267729, 
                     0.42642836, 0.41604275, 0.45047245, 0.53784704, 0.53726274, 0.51516772])
                                                                }, index=np.arange(38, 50))

# Fixtures _backtesting_forecaster_refit fixed_train_size Yes exog Yes remainder (skforecast==0.4.2)
# ==============================================================================
metric_mocked_fixed_train_yes_exog_yes_remainder = 0.06425019123005545
backtest_predictions_mocked_fixed_train_yes_exog_yes_remainder = pd.DataFrame({
    'pred':np.array([0.59059622, 0.47257504, 0.53024098, 0.46163343, 0.50035119, 0.41975558, 
                     0.4256614 , 0.41176005, 0.52357817, 0.509974  , 0.65354628, 0.48210726])
                                                                }, index=np.arange(38, 50))


def test_output_backtesting_forecaster_refit_fixed_train_size_no_exog_no_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error',
    fixed_train_size=True
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        fixed_train_size    = True,
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_fixed_train_no_exog_no_remainder
    expected_backtest_predictions = backtest_predictions_mocked_fixed_train_no_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_fixed_train_size_no_exog_yes_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, no exog, 
    12 observations to backtest, steps=5 (2 remainder), metric='mean_squared_error',
    fixed_train_size=True
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = None,
                                        initial_train_size  = len(y_train),
                                        fixed_train_size    = True,
                                        steps               = 5,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_fixed_train_no_exog_yes_remainder
    expected_backtest_predictions = backtest_predictions_mocked_fixed_train_no_exog_yes_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_fixed_train_size_yes_exog_no_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, exog is mocked,
    12 observations to backtest, steps=4 (no remainder), metric='mean_squared_error',
    fixed_train_size=True
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = exog,
                                        initial_train_size  = len(y_train),
                                        fixed_train_size    = True,
                                        steps               = 4,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_fixed_train_yes_exog_no_remainder
    expected_backtest_predictions = backtest_predictions_mocked_fixed_train_yes_exog_no_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)


def test_output_backtesting_forecaster_refit_fixed_train_size_yes_exog_yes_remainder_with_mocked():
    '''
    Test output of _backtesting_forecaster_refit with backtesting mocked, interval no.
    Regressor is LinearRegression with lags=3, Series y is mocked, exog is mocked,
    12 observations to backtest, steps=5 (2 remainder), metric='mean_squared_error',
    fixed_train_size=True
    '''
    forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=3)

    n_backtest = 12
    y_train = y[:-n_backtest]

    metric, backtest_predictions = _backtesting_forecaster_refit(
                                        forecaster          = forecaster,
                                        y                   = y,
                                        exog                = exog,
                                        initial_train_size  = len(y_train),
                                        fixed_train_size    = True,
                                        steps               = 5,
                                        metric              = 'mean_squared_error',
                                        interval            = None,
                                        n_boot              = 500,
                                        random_state        = 123,
                                        in_sample_residuals = True,
                                        verbose             = False
                                   )
    expected_metric = metric_mocked_fixed_train_yes_exog_yes_remainder
    expected_backtest_predictions = backtest_predictions_mocked_fixed_train_yes_exog_yes_remainder
    assert expected_metric == approx(metric)
    pd.testing.assert_frame_equal(expected_backtest_predictions, backtest_predictions)

In [49]:
test_output_backtesting_forecaster_refit_fixed_train_size_no_exog_no_remainder_with_mocked()
test_output_backtesting_forecaster_refit_fixed_train_size_no_exog_yes_remainder_with_mocked()
test_output_backtesting_forecaster_refit_fixed_train_size_yes_exog_no_remainder_with_mocked()
test_output_backtesting_forecaster_refit_fixed_train_size_yes_exog_yes_remainder_with_mocked()