In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\Joaquín Amat\\Documents\\GitHub\\skforecast'

In [2]:
# Libraries
# ======================================================================================
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import FunctionTransformer

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster

import xgboost
import lightgbm
import catboost
import sklearn
import skforecast

print(f"xgboost version: {xgboost.__version__}")
print(f"lightgbm version: {lightgbm.__version__}")
print(f"catboost version: {catboost.__version__}")
print(f"sklearn version: {sklearn.__version__}")
print(f"skforecast version: {skforecast.__version__}")

  from .autonotebook import tqdm as notebook_tqdm


xgboost version: 1.7.5
lightgbm version: 3.3.5
catboost version: 1.1.1
sklearn version: 1.2.2
skforecast version: 0.8.0


In [3]:
# Data
# ======================================================================================
y = pd.Series(np.random.normal(size=50))
exog_1 = pd.Series([1]*25 + [2]*25, name='exog_1', dtype=float)
exog_2 = pd.Series([True]*25 + [False]*25, name='exog_2', dtype=bool)
exog   = pd.concat((exog_1, exog_2), axis=1)
#exog.iloc[0,1] = np.nan
exog.head()

Unnamed: 0,exog_1,exog_2
0,1.0,True
1,1.0,True
2,1.0,True
3,1.0,True
4,1.0,True


In [4]:
# Opción A
# ==============================================================================
pipeline_categorical = make_pipeline(
                                OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1, encoded_missing_value=-1),
                                FunctionTransformer(func=lambda x: x.astype('category'), feature_names_out= 'one-to-one')
                        )

transformer_exog = make_column_transformer(
                        (
                            pipeline_categorical,
                            make_column_selector(dtype_exclude=np.number)
                        ),
                        remainder="passthrough",
                        verbose_feature_names_out=False,
                   ).set_output(transform="pandas")

print(transformer_exog.fit_transform(exog).head(3).dtypes)
transformer_exog.fit_transform(exog).head(3)

exog_2    category
exog_1     float64
dtype: object


Unnamed: 0,exog_2,exog_1
0,1,1.0
1,1,1.0
2,1,1.0


In [5]:
# Opción B
# ==============================================================================
categorical_features = exog.select_dtypes(exclude=np.number).columns.tolist()
transformer_exog = make_column_transformer(
                        (
                            OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1, encoded_missing_value=-1),
                            categorical_features
                        ),
                        remainder="passthrough",
                        verbose_feature_names_out=False,
                   ).set_output(transform="pandas")

print(transformer_exog.fit_transform(exog).head(3).dtypes)
transformer_exog.fit_transform(exog).head(3)

exog_2      int32
exog_1    float64
dtype: object


Unnamed: 0,exog_2,exog_1
0,1,1.0
1,1,1.0
2,1,1.0


In [6]:
# Lightgbm
# ======================================================================================
forecaster = ForecasterAutoreg(
                regressor = LGBMRegressor(
                              n_estimators=10,
                              random_state=12345
                            ),
                lags = 5,
                transformer_exog = transformer_exog,
                fit_kwargs={'categorical_feature': categorical_features}
             )
X_train, y_train = forecaster.create_train_X_y(
                        y=y,
                        exog=exog
                   )
print(X_train.dtypes)
display(X_train.head(3))
forecaster.fit(y=y, exog=exog)

exog_predict = exog.copy()
exog_predict.index = pd.RangeIndex(50, 100)
forecaster.predict(steps=3, exog=exog_predict)

lag_1     float64
lag_2     float64
lag_3     float64
lag_4     float64
lag_5     float64
exog_2      int32
exog_1    float64
dtype: object


Unnamed: 0,lag_1,lag_2,lag_3,lag_4,lag_5,exog_2,exog_1
5,1.199886,-0.444439,-0.916398,-0.289643,1.383589,1,1.0
6,-0.848731,1.199886,-0.444439,-0.916398,-0.289643,1,1.0
7,0.803075,-0.848731,1.199886,-0.444439,-0.916398,1,1.0




50   -0.260417
51   -0.398754
52   -0.398754
Name: pred, dtype: float64

In [17]:
# XGBoost
# ======================================================================================
forecaster = ForecasterAutoreg(
                regressor = XGBRegressor(
                              tree_method='hist',
                              n_estimators=10,
                              random_state=12345,
                              enable_categorical=True,
                              feature_types =['q', 'q', 'q', 'q', 'q', 'c', 'q']
                            ),
                lags = 5,
                transformer_exog = transformer_exog
             )
X_train, y_train = forecaster.create_train_X_y(
                        y=y,
                        exog=exog
                   )
print(X_train.dtypes)
display(X_train.head(3))
forecaster.fit(y=y, exog=exog)

exog_predict = exog.copy()
exog_predict.index = pd.RangeIndex(50, 100)
forecaster.predict(steps=3, exog=exog_predict)

lag_1     float64
lag_2     float64
lag_3     float64
lag_4     float64
lag_5     float64
exog_2      int32
exog_1    float64
dtype: object


Unnamed: 0,lag_1,lag_2,lag_3,lag_4,lag_5,exog_2,exog_1
5,1.199886,-0.444439,-0.916398,-0.289643,1.383589,1,1.0
6,-0.848731,1.199886,-0.444439,-0.916398,-0.289643,1,1.0
7,0.803075,-0.848731,1.199886,-0.444439,-0.916398,1,1.0


50   -0.578407
51   -1.088601
52   -1.126142
Name: pred, dtype: float64

## ForecasterMultiseries

In [4]:
series = pd.DataFrame({'l1': pd.Series(np.array(
                                [0.69646919, 0.28613933, 0.22685145, 0.55131477, 0.71946897,
                                 0.42310646, 0.9807642 , 0.68482974, 0.4809319 , 0.39211752,
                                 0.34317802, 0.72904971, 0.43857224, 0.0596779 , 0.39804426,
                                 0.73799541, 0.18249173, 0.17545176, 0.53155137, 0.53182759,
                                 0.63440096, 0.84943179, 0.72445532, 0.61102351, 0.72244338,
                                 0.32295891, 0.36178866, 0.22826323, 0.29371405, 0.63097612,
                                 0.09210494, 0.43370117, 0.43086276, 0.4936851 , 0.42583029,
                                 0.31226122, 0.42635131, 0.89338916, 0.94416002, 0.50183668,
                                 0.62395295, 0.1156184 , 0.31728548, 0.41482621, 0.86630916,
                                 0.25045537, 0.48303426, 0.98555979, 0.51948512, 0.61289453]
                                )
                            ), 
                       'l2': pd.Series(np.array(
                                [0.12062867, 0.8263408 , 0.60306013, 0.54506801, 0.34276383,
                                 0.30412079, 0.41702221, 0.68130077, 0.87545684, 0.51042234,
                                 0.66931378, 0.58593655, 0.6249035 , 0.67468905, 0.84234244,
                                 0.08319499, 0.76368284, 0.24366637, 0.19422296, 0.57245696,
                                 0.09571252, 0.88532683, 0.62724897, 0.72341636, 0.01612921,
                                 0.59443188, 0.55678519, 0.15895964, 0.15307052, 0.69552953,
                                 0.31876643, 0.6919703 , 0.55438325, 0.38895057, 0.92513249,
                                 0.84167   , 0.35739757, 0.04359146, 0.30476807, 0.39818568,
                                 0.70495883, 0.99535848, 0.35591487, 0.76254781, 0.59317692,
                                 0.6917018 , 0.15112745, 0.39887629, 0.2408559 , 0.34345601]
                                )
                            )
                      }
         )
    
exog = pd.DataFrame({'col_1': pd.Series(np.array(
                                [0.51312815, 0.66662455, 0.10590849, 0.13089495, 0.32198061,
                                 0.66156434, 0.84650623, 0.55325734, 0.85445249, 0.38483781,
                                 0.3167879 , 0.35426468, 0.17108183, 0.82911263, 0.33867085,
                                 0.55237008, 0.57855147, 0.52153306, 0.00268806, 0.98834542,
                                 0.90534158, 0.20763586, 0.29248941, 0.52001015, 0.90191137,
                                 0.98363088, 0.25754206, 0.56435904, 0.80696868, 0.39437005,
                                 0.73107304, 0.16106901, 0.60069857, 0.86586446, 0.98352161,
                                 0.07936579, 0.42834727, 0.20454286, 0.45063649, 0.54776357,
                                 0.09332671, 0.29686078, 0.92758424, 0.56900373, 0.457412  ,
                                 0.75352599, 0.74186215, 0.04857903, 0.7086974 , 0.83924335]
                                )
                              ),
                     'col_2': ['a']*25 + ['b']*25}
       )

exog_predict = exog.copy()
exog_predict.index = pd.RangeIndex(start=50, stop=100)

In [8]:
series.head(3)

Unnamed: 0,l1,l2
0,0.696469,0.120629
1,0.286139,0.826341
2,0.226851,0.60306


In [94]:
from skforecast.utils import check_y
from skforecast.utils import check_exog
from skforecast.utils import check_exog_dtypes
from skforecast.utils import get_exog_dtypes
from skforecast.utils import transform_series
from skforecast.utils import preprocess_y
from skforecast.utils import preprocess_exog
from skforecast.utils import transform_dataframe
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from sklearn.base import clone
from copy import copy, deepcopy
import warnings

forecaster = ForecasterAutoregMultiSeries(LinearRegression(), lags=3)

In [95]:
if not isinstance(series, pd.DataFrame):
    raise TypeError(f"`series` must be a pandas DataFrame. Got {type(series)}.")

series_col_names = list(series.columns)

if forecaster.transformer_series is None:
    forecaster.transformer_series_ = {serie: None for serie in series_col_names}
elif not isinstance(forecaster.transformer_series, dict):
    forecaster.transformer_series_ = {serie: clone(forecaster.transformer_series) 
                                for serie in series_col_names}
else:
    forecaster.transformer_series_ = {serie: None for serie in series_col_names}
    # Only elements already present in transformer_series_ are updated
    forecaster.transformer_series_.update(
        (k, v) for k, v in deepcopy(forecaster.transformer_series).items() if k in forecaster.transformer_series_
    )
    series_not_in_transformer_series = set(series.columns) - set(forecaster.transformer_series.keys())
    if series_not_in_transformer_series:
        warnings.warn(
            (f"{series_not_in_transformer_series} not present in `transformer_series`."
                f" No transformation is applied to these series.")
        )

In [96]:
X_levels = []
X_train_col_names = [f"lag_{lag}" for lag in forecaster.lags]

for i, serie in enumerate(series.columns):

    y = series[serie]
    check_y(y=y)
    y = transform_series(
            series            = y,
            transformer       = forecaster.transformer_series_[serie],
            fit               = True,
            inverse_transform = False
        )

    y_values, y_index = preprocess_y(y=y)
    X_train_values, y_train_values = forecaster._create_lags(y=y_values)

    if i == 0:
        X_train = X_train_values
        y_train = y_train_values
    else:
        X_train = np.vstack((X_train, X_train_values))
        y_train = np.append(y_train, y_train_values)

    X_level = [serie]*len(X_train_values)
    X_levels.extend(X_level)

In [97]:
X_levels[:5]

['l1', 'l1', 'l1', 'l1', 'l1']

In [98]:
X_train[:5]

array([[0.22685145, 0.28613933, 0.69646919],
       [0.55131477, 0.22685145, 0.28613933],
       [0.71946897, 0.55131477, 0.22685145],
       [0.42310646, 0.71946897, 0.55131477],
       [0.9807642 , 0.42310646, 0.71946897]])

In [99]:
if exog is not None:
    if len(exog) != len(y):
        raise ValueError(
            f'`exog` must have same number of samples as `y`. '
            f'length `exog`: ({len(exog)}), length `y`: ({len(y)})'
        )
    check_exog(exog=exog, allow_nan=True)
    if isinstance(exog, pd.Series):
        exog = transform_series(
                    series            = exog,
                    transformer       = forecaster.transformer_exog,
                    fit               = True,
                    inverse_transform = False
                )
    else:
        exog = transform_dataframe(
                    df                = exog,
                    transformer       = forecaster.transformer_exog,
                    fit               = True,
                    inverse_transform = False
                )
    check_exog(exog=exog, allow_nan=False)
    _, exog_index = preprocess_exog(exog=exog, return_values=False)
    
    if not (exog_index[:len(y_index)] == y_index).all():
        raise ValueError(
            ("Different index for `y` and `exog`. They must be equal "
                "to ensure the correct alignment of values.")
        )
    forecaster.exog_dtypes = get_exog_dtypes(exog=exog)

In [100]:
exog_to_train = exog.iloc[forecaster.max_lag:, ]
check_exog_dtypes(exog_to_train)
exog_to_train = pd.concat([exog_to_train]*len(series_col_names)).reset_index(drop=True)

  ("`exog` may contain only `int`, `float` or `category` dtypes. Most "


In [101]:
print(len(exog_to_train))
exog_to_train.head(3)

94


Unnamed: 0,col_1,col_2
3,0.130895,a
4,0.321981,a
5,0.661564,a


In [102]:
X_levels = pd.Series(X_levels)
X_levels = pd.get_dummies(X_levels, dtype=float)
print(len(X_levels))
X_levels.head(3)

94


Unnamed: 0,l1,l2
0,1.0,0.0
1,1.0,0.0
2,1.0,0.0


In [71]:
X_train_col_names

['lag_1', 'lag_2', 'lag_3']

In [103]:
X_train = pd.DataFrame(
              data    = X_train,
              columns = X_train_col_names
          )
print(len(X_train))
X_train.head(3)

94


Unnamed: 0,lag_1,lag_2,lag_3
0,0.226851,0.286139,0.696469
1,0.551315,0.226851,0.286139
2,0.719469,0.551315,0.226851


In [104]:
X_train = pd.concat([X_train, X_levels, exog_to_train], axis=1)
X_train.head(3)

InvalidIndexError: Reindexing only valid with uniquely valued Index objects

In [74]:
X_train.dtypes

lag_1    float64
lag_2    float64
lag_3    float64
l1       float64
l2       float64
col_1    float64
col_2     object
dtype: object

In [23]:
X_train = np.column_stack((
                              X_train,
                              np.tile(exog.to_numpy()[forecaster.max_lag:, ], [series.shape[1], 1])
                          ))
X_train

array([[0.22685145, 0.28613933, 0.69646919, 0.13089495, 'a'],
       [0.55131477, 0.22685145, 0.28613933, 0.32198061, 'a'],
       [0.71946897, 0.55131477, 0.22685145, 0.66156434, 'a'],
       [0.42310646, 0.71946897, 0.55131477, 0.84650623, 'a'],
       [0.9807642, 0.42310646, 0.71946897, 0.55325734, 'a'],
       [0.68482974, 0.9807642, 0.42310646, 0.85445249, 'a'],
       [0.4809319, 0.68482974, 0.9807642, 0.38483781, 'a'],
       [0.39211752, 0.4809319, 0.68482974, 0.3167879, 'a'],
       [0.34317802, 0.39211752, 0.4809319, 0.35426468, 'a'],
       [0.72904971, 0.34317802, 0.39211752, 0.17108183, 'a'],
       [0.43857224, 0.72904971, 0.34317802, 0.82911263, 'a'],
       [0.0596779, 0.43857224, 0.72904971, 0.33867085, 'a'],
       [0.39804426, 0.0596779, 0.43857224, 0.55237008, 'a'],
       [0.73799541, 0.39804426, 0.0596779, 0.57855147, 'a'],
       [0.18249173, 0.73799541, 0.39804426, 0.52153306, 'a'],
       [0.17545176, 0.18249173, 0.73799541, 0.00268806, 'a'],
       [0.53155137

In [24]:
exog.head(3)

Unnamed: 0,col_1,col_2
0,0.513128,a
1,0.666625,a
2,0.105908,a


In [9]:
pd.concat([series, exog, None, None], axis=1).head(3)

Unnamed: 0,l1,l2,col_1,col_2
0,0.696469,0.120629,0.513128,a
1,0.286139,0.826341,0.666625,a
2,0.226851,0.60306,0.105908,a


In [5]:
pd.DataFrame(
            data = np.array([[4., 3., 2., 1., 0.],
                             [5., 4., 3., 2., 1.],
                             [6., 5., 4., 3., 2.],
                             [7., 6., 5., 4., 3.],
                             [8., 7., 6., 5., 4.]]),
            index   = np.array([5, 6, 7, 8, 9]),
            columns = ['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5']
        ).assign(exog=[True]*5).astype({'exog': bool})

Unnamed: 0,lag_1,lag_2,lag_3,lag_4,lag_5,exog
5,4.0,3.0,2.0,1.0,0.0,True
6,5.0,4.0,3.0,2.0,1.0,True
7,6.0,5.0,4.0,3.0,2.0,True
8,7.0,6.0,5.0,4.0,3.0,True
9,8.0,7.0,6.0,5.0,4.0,True


In [7]:
y = pd.Series(np.arange(10), dtype=float)
exog = pd.Series([True]*10, name='exog', dtype=bool)
forecaster = ForecasterAutoreg(LinearRegression(), lags=5)
results = forecaster.create_train_X_y(y=y, exog=exog)



In [14]:
import re
import pytest
import numpy as np
import pandas as pd
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.exceptions import MissingValuesExogWarning
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

In [23]:
    series = pd.DataFrame({'1': pd.Series(np.arange(10, dtype=float)), 
                           '2': pd.Series(np.arange(10, dtype=float))})
    exog = pd.DataFrame({
               'exog_1': np.arange(100, 110, dtype=int),
               'exog_2': np.arange(1000, 1010, dtype=int)
           })

    forecaster = ForecasterAutoregMultiSeries(LinearRegression(), lags=5)
    results = forecaster.create_train_X_y(series=series, exog=exog)    

    expected = (
        pd.DataFrame(
            data = np.array([[4., 3., 2., 1., 0., 105., 1005., 1., 0.],
                             [5., 4., 3., 2., 1., 106., 1006., 1., 0.],
                             [6., 5., 4., 3., 2., 107., 1007., 1., 0.],
                             [7., 6., 5., 4., 3., 108., 1008., 1., 0.],
                             [8., 7., 6., 5., 4., 109., 1009., 1., 0.],
                             [4., 3., 2., 1., 0., 105., 1005., 0., 1.],
                             [5., 4., 3., 2., 1., 106., 1006., 0., 1.],
                             [6., 5., 4., 3., 2., 107., 1007., 0., 1.],
                             [7., 6., 5., 4., 3., 108., 1008., 0., 1.],
                             [8., 7., 6., 5., 4., 109., 1009., 0., 1.]]),
            index   = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
            columns = ['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 
                       'exog_1', 'exog_2', '1', '2']
        ).astype({'exog_1': int, 'exog_2': int}),
        pd.Series(
            data  = np.array([5, 6, 7, 8, 9, 5, 6, 7, 8, 9]),
            index = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
            name  = 'y',
            dtype = float
        ),
        pd.RangeIndex(start=0, stop=len(series), step=1),
        pd.Index(np.array([5, 6, 7, 8, 9, 5, 6, 7, 8, 9]))
    )

    for i in range(len(expected)):
        if isinstance(expected[i], pd.DataFrame):
            pd.testing.assert_frame_equal(results[i], expected[i])
        elif isinstance(expected[i], pd.Series):
            pd.testing.assert_series_equal(results[i], expected[i])
        else:
            print(4)
            np.testing.assert_array_equal(results[i], expected[i])

4
4


In [36]:
v_exog_1 = [True]
v_exog_2 = [False]
dtype = bool

series = pd.DataFrame({'l1': pd.Series(np.arange(10, dtype=float)), 
                        'l2': pd.Series(np.arange(10, dtype=float))})
exog = pd.DataFrame({'exog_1': v_exog_1*10,
                        'exog_2': v_exog_2*10})

forecaster = ForecasterAutoregMultiSeries(LinearRegression(), lags=5)
results = forecaster.create_train_X_y(series=series, exog=exog)    

expected = (
    pd.DataFrame(
        data = np.array([[4., 3., 2., 1., 0.],
                            [5., 4., 3., 2., 1.],
                            [6., 5., 4., 3., 2.],
                            [7., 6., 5., 4., 3.],
                            [8., 7., 6., 5., 4.],
                            [4., 3., 2., 1., 0.],
                            [5., 4., 3., 2., 1.],
                            [6., 5., 4., 3., 2.],
                            [7., 6., 5., 4., 3.],
                            [8., 7., 6., 5., 4.]]),
        index   = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
        columns = ['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5']
    ).assign(exog_1=v_exog_1*5 + v_exog_1*5, 
                exog_2=v_exog_2*5 + v_exog_2*5, 
                l1=[1.]*5 + [0.]*5, 
                l2=[0.]*5 + [1.]*5).astype({'exog_1': dtype, 'exog_2': dtype}),
    pd.Series(
        data  = np.array([5, 6, 7, 8, 9, 5, 6, 7, 8, 9]),
        index = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
        name  = 'y',
        dtype = float
    ),
    pd.RangeIndex(start=0, stop=len(series), step=1),
    pd.Index(np.array([5, 6, 7, 8, 9, 5, 6, 7, 8, 9]))
)

for i in range(len(expected)):
    if isinstance(expected[i], pd.DataFrame):
        pd.testing.assert_frame_equal(results[i], expected[i])
    elif isinstance(expected[i], pd.Series):
        pd.testing.assert_series_equal(results[i], expected[i])
    else:
        np.testing.assert_array_equal(results[i], expected[i])



In [53]:
exog = pd.Categorical([5, 6, 7, 8, 9]*2, categories=range(10))

In [55]:

series = pd.DataFrame({'l1': pd.Series(np.arange(10, dtype=float)), 
                        'l2': pd.Series(np.arange(10, dtype=float))})
forecaster = ForecasterAutoregMultiSeries(
                regressor          = LinearRegression(),
                lags               = 5,
                transformer_series = StandardScaler()
            )
results = forecaster.create_train_X_y(series=series)

In [64]:
results[1].to_numpy()

array([0.17407766, 0.52223297, 0.87038828, 1.21854359, 1.5666989 ,
       0.17407766, 0.52223297, 0.87038828, 1.21854359, 1.5666989 ])

In [87]:
series = pd.DataFrame({'1': np.arange(10, dtype=float), 
                        '2': np.arange(10, dtype=float)},
                        index = pd.date_range("1990-01-01", periods=10, freq='D'))
exog = pd.DataFrame({
            'col_1': [7.5, 24.4, 60.3, 57.3, 50.7, 41.4, 24.4, 87.2, 47.4, 23.8],
            'col_2': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b']},
            index = pd.date_range("1990-01-01", periods=10, freq='D'))

transformer_exog = ColumnTransformer(
                        [('scale', StandardScaler(), ['col_1']),
                            ('onehot', OneHotEncoder(), ['col_2'])],
                        remainder = 'passthrough',
                        verbose_feature_names_out = False
                    )

forecaster = ForecasterAutoregMultiSeries(
                    regressor          = LinearRegression(),
                    lags               = 3,
                    transformer_series = StandardScaler(),
                    transformer_exog   = transformer_exog
                )
results = forecaster.create_train_X_y(series=series, exog=exog)

expected = (
    pd.DataFrame(
        data = np.array([
                    [-0.87038828, -1.21854359, -1.5666989 ,  0.67431975, 1., 0., 1., 0.],
                    [-0.52223297, -0.87038828, -1.21854359,  0.37482376, 1., 0., 1., 0.],
                    [-0.17407766, -0.52223297, -0.87038828, -0.04719331, 0., 1., 1., 0.],
                    [ 0.17407766, -0.17407766, -0.52223297, -0.81862236, 0., 1., 1., 0.],
                    [ 0.52223297,  0.17407766, -0.17407766,  2.03112731, 0., 1., 1., 0.],
                    [ 0.87038828,  0.52223297,  0.17407766,  0.22507577, 0., 1., 1., 0.],
                    [ 1.21854359,  0.87038828,  0.52223297, -0.84584926, 0., 1., 1., 0.],
                    [-0.87038828, -1.21854359, -1.5666989 ,  0.67431975, 1., 0., 0., 1.],
                    [-0.52223297, -0.87038828, -1.21854359,  0.37482376, 1., 0., 0., 1.],
                    [-0.17407766, -0.52223297, -0.87038828, -0.04719331, 0., 1., 0., 1.],
                    [ 0.17407766, -0.17407766, -0.52223297, -0.81862236, 0., 1., 0., 1.],
                    [ 0.52223297,  0.17407766, -0.17407766,  2.03112731, 0., 1., 0., 1.],
                    [ 0.87038828,  0.52223297,  0.17407766,  0.22507577, 0., 1., 0., 1.],
                    [ 1.21854359,  0.87038828,  0.52223297, -0.84584926, 0., 1., 0., 1.]]),
        index   = pd.RangeIndex(start=0, stop=14, step=1),
        columns = ['lag_1', 'lag_2', 'lag_3', 'col_1',
                    'col_2_a', 'col_2_b', '1', '2']
    ),
    pd.Series(
        data  = np.array([-0.52223297, -0.17407766,  0.17407766,  0.52223297,  0.87038828,
                            1.21854359,  1.5666989 , -0.52223297, -0.17407766,  0.17407766,
                            0.52223297,  0.87038828,  1.21854359,  1.5666989 ]),
        index = pd.RangeIndex(start=0, stop=14, step=1),
        name  = 'y',
        dtype = float
    ),
    pd.date_range("1990-01-01", periods=10, freq='D'),
    pd.Index(pd.DatetimeIndex(['1990-01-04', '1990-01-05', '1990-01-06', '1990-01-07', 
                                '1990-01-08', '1990-01-09', '1990-01-10', '1990-01-04',
                                '1990-01-05', '1990-01-06', '1990-01-07', '1990-01-08',
                                '1990-01-09', '1990-01-10']))
)

for i in range(len(expected)):
    if isinstance(expected[i], pd.DataFrame):
        pd.testing.assert_frame_equal(results[i], expected[i])
    elif isinstance(expected[i], pd.Series):
        pd.testing.assert_series_equal(results[i], expected[i])
    else:
        np.testing.assert_array_equal(results[i], expected[i])

In [86]:
results[0]

Unnamed: 0,lag_1,lag_2,lag_3,col_1,col_2_a,col_2_b,1,2
0,-0.870388,-1.218544,-1.566699,0.67432,1.0,0.0,1.0,0.0
1,-0.522233,-0.870388,-1.218544,0.374824,1.0,0.0,1.0,0.0
2,-0.174078,-0.522233,-0.870388,-0.047193,0.0,1.0,1.0,0.0
3,0.174078,-0.174078,-0.522233,-0.818622,0.0,1.0,1.0,0.0
4,0.522233,0.174078,-0.174078,2.031127,0.0,1.0,1.0,0.0
5,0.870388,0.522233,0.174078,0.225076,0.0,1.0,1.0,0.0
6,1.218544,0.870388,0.522233,-0.845849,0.0,1.0,1.0,0.0
7,-0.870388,-1.218544,-1.566699,0.67432,1.0,0.0,0.0,1.0
8,-0.522233,-0.870388,-1.218544,0.374824,1.0,0.0,0.0,1.0
9,-0.174078,-0.522233,-0.870388,-0.047193,0.0,1.0,0.0,1.0


In [80]:
results[3]

DatetimeIndex(['1990-01-04', '1990-01-05', '1990-01-06', '1990-01-07',
               '1990-01-08', '1990-01-09', '1990-01-10', '1990-01-04',
               '1990-01-05', '1990-01-06', '1990-01-07', '1990-01-08',
               '1990-01-09', '1990-01-10'],
              dtype='datetime64[ns]', freq=None)