In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

### FOrecasterMultiseries

In [2]:
################################################################################
#                        ForecasterAutoregMultiSeries                          #
#                                                                              #
# This work by skforecast team is licensed under the BSD 3-Clause License.     #
################################################################################
# coding=utf-8

from typing import Union, Tuple, Optional, Callable
import warnings
import logging
import sys
import numpy as np
import pandas as pd
import sklearn
import sklearn.pipeline
from sklearn.base import clone
from sklearn.preprocessing import StandardScaler
from copy import copy, deepcopy
import inspect

import skforecast
from skforecast.ForecasterBase import ForecasterBase
from skforecast.exceptions import IgnoredArgumentWarning
from skforecast.exceptions import MissingValuesExogWarning
from skforecast.utils import initialize_lags
from skforecast.utils import initialize_weights
from skforecast.utils import check_select_fit_kwargs
from skforecast.utils import check_y
from skforecast.utils import check_exog
from skforecast.utils import get_exog_dtypes
from skforecast.utils import check_exog_dtypes
from skforecast.utils import check_interval
from skforecast.utils import check_predict_input
from skforecast.utils import preprocess_y
from skforecast.utils import preprocess_last_window
from skforecast.utils import preprocess_exog
from skforecast.utils import expand_index
from skforecast.utils import transform_series
from skforecast.utils import transform_dataframe
from skforecast.preprocessing import TimeSeriesDifferentiator

logging.basicConfig(
    format = '%(name)-10s %(levelname)-5s %(message)s', 
    level  = logging.INFO,
)

class ForecasterAutoregMultiSeries(ForecasterBase):
    """    
    """
    
    def __init__(
        self,
        regressor: object,
        lags: Union[int, np.ndarray, list],
        transformer_series: Optional[Union[object, dict]]=StandardScaler(),
        transformer_exog: Optional[object]=None,
        weight_func: Optional[Union[Callable, dict]]=None,
        series_weights: Optional[dict]=None,
        differentiation: Optional[int]=None,
        fit_kwargs: Optional[dict]=None,
        forecaster_id: Optional[Union[str, int]]=None
    ) -> None:
        
        self.regressor               = regressor
        self.transformer_series      = transformer_series
        self.transformer_series_     = None
        self.transformer_exog        = transformer_exog
        self.weight_func             = weight_func
        self.weight_func_            = None
        self.source_code_weight_func = None
        self.series_weights          = series_weights
        self.series_weights_         = None
        self.differentiation         = differentiation
        self.differentiator          = None
        self.differentiator_         = None
        self.index_type              = None
        self.index_freq              = None
        self.index_values            = None
        self.training_range          = None
        self.last_window             = None
        self.included_exog           = False
        self.exog_type               = None
        self.exog_dtypes             = None
        self.exog_col_names          = None
        self.series_col_names        = None
        self.X_train_col_names       = None
        self.in_sample_residuals     = None
        self.out_sample_residuals    = None
        self.fitted                  = False
        self.creation_date           = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M:%S')
        self.fit_date                = None
        self.skforecast_version      = skforecast.__version__
        self.python_version          = sys.version.split(" ")[0]
        self.forecaster_id           = forecaster_id
        
        self.lags = initialize_lags(type(self).__name__, lags)
        self.max_lag = max(self.lags)
        self.window_size = self.max_lag

        self.weight_func, self.source_code_weight_func, self.series_weights = initialize_weights(
            forecaster_name = type(self).__name__, 
            regressor       = regressor, 
            weight_func     = weight_func, 
            series_weights  = series_weights
        )

        if self.differentiation is not None:
            if not isinstance(differentiation, int) or differentiation < 1:
                raise ValueError(
                    (f"Argument `differentiation` must be an integer equal to or "
                     f"greater than 1. Got {differentiation}.")
                )
            self.window_size += self.differentiation
            self.differentiator = TimeSeriesDifferentiator(order=self.differentiation)

        self.fit_kwargs = check_select_fit_kwargs(
                              regressor  = regressor,
                              fit_kwargs = fit_kwargs
                          )


    def __repr__(
        self
    ) -> str:
        """
        Information displayed when a ForecasterAutoregMultiSeries object is printed.
        """

        if isinstance(self.regressor, sklearn.pipeline.Pipeline):
            name_pipe_steps = tuple(name + "__" for name in self.regressor.named_steps.keys())
            params = {key : value for key, value in self.regressor.get_params().items() \
                      if key.startswith(name_pipe_steps)}
        else:
            params = self.regressor.get_params()

        info = (
            f"{'=' * len(type(self).__name__)} \n"
            f"{type(self).__name__} \n"
            f"{'=' * len(type(self).__name__)} \n"
            f"Regressor: {self.regressor} \n"
            f"Lags: {self.lags} \n"
            f"Transformer for series: {self.transformer_series} \n"
            f"Transformer for exog: {self.transformer_exog} \n"
            f"Window size: {self.window_size} \n"
            f"Series levels (names): {self.series_col_names} \n"
            f"Series weights: {self.series_weights} \n"
            f"Weight function included: {True if self.weight_func is not None else False} \n"
            f"Exogenous included: {self.included_exog} \n"
            f"Type of exogenous variable: {self.exog_type} \n"
            f"Exogenous variables names: {self.exog_col_names} \n"
            f"Training range: {self.training_range.to_list() if self.fitted else None} \n"
            f"Training index type: {str(self.index_type).split('.')[-1][:-2] if self.fitted else None} \n"
            f"Training index frequency: {self.index_freq if self.fitted else None} \n"
            f"Regressor parameters: {params} \n"
            f"fit_kwargs: {self.fit_kwargs} \n"
            f"Creation date: {self.creation_date} \n"
            f"Last fit date: {self.fit_date} \n"
            f"Skforecast version: {self.skforecast_version} \n"
            f"Python version: {self.python_version} \n"
            f"Forecaster id: {self.forecaster_id} \n"
        )

        return info

    
    def _create_lags(
        self, 
        y: np.ndarray, 
        series_name: str
    ) -> Tuple[np.ndarray, np.ndarray]:
        """        
        """
          
        n_splits = len(y) - self.max_lag
        if n_splits <= 0:
            raise ValueError(
                (f"The maximum lag ({self.max_lag}) must be less than the length "
                 f"of the series '{series_name}', ({len(y)}).")
            )
        
        X_data = np.full(shape=(n_splits, len(self.lags)), fill_value=np.nan, dtype=float)

        for i, lag in enumerate(self.lags):
            X_data[:, i] = y[self.max_lag - lag: -lag]

        y_data = y[self.max_lag:]
            
        return X_data, y_data
    

    def _create_train_X_y_single_series(
        self,
        y: pd.Series,
        exog: Optional[Union[pd.Series, pd.DataFrame]]=None
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series]:
        """        
        """

        series_name = y.name
        check_y(y=y)
        y = transform_series(
                series            = y,
                transformer       = self.transformer_series_[series_name],
                fit               = True,
                inverse_transform = False
            )
        
        # TODO: check if this is necessary
        y_values, y_index = preprocess_y(y=y)

        if self.differentiation is not None:
            y_values = self.differentiator_[series_name].fit_transform(y_values)
        
        X_train, y_train = self._create_lags(y=y_values, series_name=series_name)
        X_train_lags = pd.DataFrame(
                        data    = X_train,
                        columns = self.X_train_col_names,
                        index   = y_index[self.max_lag: ]
                    )

        if self.included_exog:
            if exog is not None:
                # The first `self.max_lag` positions have to be removed from exog
                # since they are not in X_train.
                X_train_exog = exog.iloc[self.max_lag:, ]
            else:
                X_train_exog = pd.DataFrame(
                                data    = np.nan,
                                columns = ['_dummy_exog_col_to_keep_shape'],
                                index   = y_index[self.max_lag: ]
                            )
        else:
            X_train_exog = None

        y_train = pd.Series(
                    data  = y_train,
                    index = y_index[self.max_lag: ],
                    name  = 'y'
                )

        if self.differentiation is not None:
            y_train = y_train.iloc[self.differentiation: ]
            X_train_lags = X_train_lags.iloc[self.differentiation: ]
            if X_train_exog is not None:
                X_train_exog = X_train_exog.iloc[self.differentiation: ]

        # assert X_train_lags.index.equals(y_train.index)
        # assert X_train_exog.index.equals(y_train.index)
                        
        return X_train_lags, X_train_exog, y_train


    def create_train_X_y(
        self,
        series: Union[pd.DataFrame, dict],
        exog: Optional[Union[pd.Series, pd.DataFrame, dict]]=None
    ) -> Tuple[pd.DataFrame, pd.Series, pd.Index, pd.Index]:
        """
        Create training matrices from multiple time series and exogenous
        variables.
        
        Parameters
        ----------
        series : pandas DataFrame, dict
            Training time series.
        exog : pandas Series, pandas DataFrame, dict, default `None`
            Exogenous variable/s included as predictor/s. Must have the same
            number of observations as `series` and their indexes must be aligned.

        Returns
        -------
        X_train : pandas DataFrame
            Training values (predictors).
        y_train : pandas Series
            Values (target) of the time series related to each row of `X_train`.
            Shape: (len(series) - self.max_lag, )
        y_index : pandas Index
            Index of `series`.
        y_train_index: pandas Index
            Index of `y_train`.
        
        """

        if isinstance(series, pd.DataFrame):
            series_col_names = list(series.columns)
            series_dict = series.to_dict("series")
        elif isinstance(series, dict):
            series_col_names = list(series.keys())
            series_dict = series
        else:
            raise TypeError(f"`series` must be a pandas DataFrame or dict. Got {type(series)}.")
        
        # TODO: incluir reset en fit
        self.series_col_names = series_col_names
        
                    
        # Check that all series have the same index type and frequency
        indexes_dtypes = [series.index.dtype.name 
                        for series in series_dict.values()]
        if not len(set(indexes_dtypes)) == 1:
            raise TypeError("All series must have the same index type.")

        indexes_freq = [series.index.freq 
                        if isinstance(series.index, pd.DatetimeIndex) else series.index.step 
                        for series in series_dict.values()]
        if not len(set(indexes_freq)) == 1:
            raise ValueError("All series must have the same frequency/step.")
        
        exog_dict = {serie: None for serie in series_col_names}
        if exog is not None:
            
            if not isinstance(exog, (pd.Series, pd.DataFrame, dict)):
                raise TypeError(
                    (f"`exog` must be a pandas Series, DataFrame or dict. "
                     f"Got {type(exog)}.")
                )
            
            if not isinstance(exog, dict):
                exog_dict = {serie: exog for serie in series_col_names}
            else:
                # Only elements already present in exog_dict are updated
                exog_dict.update(
                    (k, v) for k, v in exog.items() 
                    if k in exog_dict
                )
                series_not_in_exog = set(series_col_names) - set(exog.keys())
                if series_not_in_exog:
                    warnings.warn(
                        (f"{series_not_in_exog} not present in `exog`. All values "
                         f"of the exogenous variables for these series will be NaN."),
                         MissingValuesExogWarning
                    )
            
            # Convert exog to dataframe if it is a series
            for k, v in exog_dict.items():
                check_exog(exog=v, allow_nan=True)
                if isinstance(v, pd.Series):
                    v = v.to_frame()
                exog_dict[k] = v

            exog_col_names = list(set(column for df in exog_dict.values() 
                                      for column in df.columns.to_list()))

            # Check that all exog have the same dtypes for common columns
            exog_dtype_dict = {col_name: set() for col_name in exog_col_names}
            for exog in exog_dict.values():
                for col_name in exog.columns:
                    exog_dtype_dict[col_name].add(exog[col_name].dtype.name)
            
            for col_name, dtypes in exog_dtype_dict.items():
                if len(dtypes) > 1:
                    raise TypeError(
                        (f"Column {col_name} has different dtypes in different exog "
                         f"DataFrames or Series.")
                    )
                
            self.included_exog = True
            self.exog_type = type(exog)
            self.exog_col_names = exog_col_names

            if len(set(self.exog_col_names) - set(self.series_col_names)) != len(self.exog_col_names):
                raise ValueError(
                    (f"`exog` cannot contain a column named the same as one of the "
                     f"series (column names of series).\n"
                     f"    `series` columns : {self.series_col_names}.\n"
                     f"    `exog`   columns : {self.exog_col_names}.")
                )

        if self.transformer_series is None:
            self.transformer_series_ = {serie: None for serie in series_col_names}
        elif not isinstance(self.transformer_series, dict):
            self.transformer_series_ = {serie: clone(self.transformer_series) 
                                        for serie in series_col_names}
        else:
            self.transformer_series_ = {serie: None for serie in series_col_names}
            # Only elements already present in transformer_series_ are updated
            self.transformer_series_.update(
                (k, v) for k, v in deepcopy(self.transformer_series).items() 
                if k in self.transformer_series_
            )
            series_not_in_transformer_series = set(series_col_names) - set(self.transformer_series.keys())
            if series_not_in_transformer_series:
                warnings.warn(
                    (f"{series_not_in_transformer_series} not present in `transformer_series`."
                     f" No transformation is applied to these series."),
                     IgnoredArgumentWarning
                )
        
        if self.differentiation is None:
            self.differentiator_ = {serie: None for serie in series_col_names}
        else:
            self.differentiator_ = {serie: clone(self.self.differentiator) 
                                    for serie in series_col_names}
            
        # Remove leading and trailing nans from each series.
        for k, v in series_dict.items():
            series_dict[k] = v.loc[v.first_valid_index() : v.last_valid_index()]
                    
        X_train_lags_buffer = []
        X_train_exog_buffer = []
        y_train_buffer = []

        self.X_train_col_names = [f"lag_{i}" for i in self.lags]
        for key in series_dict.keys():

            y = series_dict[key]
            exog_level = exog_dict[key]

            if self.included_exog:

                if y.index.dtype.name != exog_level.index.dtype.name:
                    raise TypeError(
                        (f"Series '{key}' and its `exog` must have the same index type.")
                    )
                
                y_freq = y.index.freq if isinstance(y.index, pd.DatetimeIndex) else y.index.step
                exog_freq = exog_level.index.freq if isinstance(exog_level.index, pd.DatetimeIndex) else exog_level.index.step
                if y_freq != exog_freq:
                    raise TypeError(
                        (f"Series '{key}' and its `exog` must have the same frequency/step.")
                    )

                index_intersection = y.index.intersection(exog_level.index)
                if len(index_intersection) == 0:
                    warnings.warn(
                        (f"Series '{key}' and its `exog` do not have the same index. "
                         f"All exog values will be NaN for the period of the series."),
                        MissingValuesExogWarning
                    )
                elif len(index_intersection) != len(y):
                    warnings.warn(
                        (f"Series '{key}' and its `exog` do not have the same length. "
                         f"Exog values will be NaN for the not matched period of the series."),
                        MissingValuesExogWarning
                    )
                    
                exog_level = exog_level.loc[index_intersection]
                if len(index_intersection) != len(y):
                    exog_level = exog_level.reindex(y.index, fill_value=np.nan)

            X_train_lags, X_train_exog, y_train = (
                self._create_train_X_y_single_series(y=y, exog=exog_level)
            )
            X_train_lags['_level_skforecast'] = key
            
            X_train_lags_buffer.append(X_train_lags)
            X_train_exog_buffer.append(X_train_exog)
            y_train_buffer.append(y_train)

        X_train = pd.concat(X_train_lags_buffer, axis=0)
        y_train = pd.concat(y_train_buffer, axis=0)

        X_train = pd.get_dummies(X_train, columns=['_level_skforecast'], dtype=float)
        X_train.columns = X_train.columns.str.replace('_level_skforecast', '')

        if self.included_exog:
            X_train_exog = pd.concat(X_train_exog_buffer, axis=0)
            if '_dummy_exog_col_to_keep_shape' in X_train_exog.columns:
                X_train_exog = X_train_exog.drop(columns=['_dummy_exog_col_to_keep_shape'])
            X_train_exog = transform_dataframe(
                               df                = X_train_exog,
                               transformer       = self.transformer_exog,
                               fit               = True,
                               inverse_transform = False
                           )
            
            check_exog_dtypes(X_train_exog)
            self.exog_dtypes = get_exog_dtypes(exog=X_train_exog)

            # TODO: check if this is necessary
            if not (X_train_exog.index == X_train.index).all():
                raise ValueError(
                    ("Different index for `y` and `exog`. They must be equal "
                     "to ensure the correct alignment of values.")
                )
            X_train = pd.concat([X_train, X_train_exog], axis=1)

        self.X_train_col_names = X_train.columns.to_list()
        y_train_index = y_train.index.to_numpy()

        # TODO: check if this is necessary
        y_index = None

        return X_train, y_train, y_index, y_train_index

    
    def create_sample_weights(
        self,
        series: pd.DataFrame,
        X_train: pd.DataFrame,
        y_train_index: pd.Index,
    )-> np.ndarray:
        """        
        """

        weights = None
        weights_samples = None
        weights_series = None

        if self.series_weights is not None:
            # Series not present in series_weights have a weight of 1 in all their samples.
            # Keys in series_weights not present in series are ignored.
            series_not_in_series_weights = set(series.columns) - set(self.series_weights.keys())
            if series_not_in_series_weights:
                warnings.warn(
                    (f"{series_not_in_series_weights} not present in `series_weights`. "
                     f"A weight of 1 is given to all their samples."),
                     IgnoredArgumentWarning
                )
            self.series_weights_ = {col: 1. for col in series.columns}
            self.series_weights_.update((k, v) for k, v in self.series_weights.items() 
                                        if k in self.series_weights_)
            weights_series = [np.repeat(self.series_weights_[serie], sum(X_train[serie])) 
                              for serie in series.columns]
            weights_series = np.concatenate(weights_series)

        if self.weight_func is not None:
            if isinstance(self.weight_func, Callable):
                self.weight_func_ = {col: copy(self.weight_func) 
                                     for col in series.columns}
            else:
                # Series not present in weight_func have a weight of 1 in all their samples
                series_not_in_weight_func = set(series.columns) - set(self.weight_func.keys())
                if series_not_in_weight_func:
                    warnings.warn(
                        (f"{series_not_in_weight_func} not present in `weight_func`. "
                         f"A weight of 1 is given to all their samples."),
                         IgnoredArgumentWarning
                    )
                self.weight_func_ = {col: lambda x: np.ones_like(x, dtype=float) 
                                     for col in series.columns}
                self.weight_func_.update((k, v) for k, v in self.weight_func.items() 
                                         if k in self.weight_func_)
                
            weights_samples = []
            for key in self.weight_func_.keys():
                idx = y_train_index[X_train[X_train[key] == 1.0].index]
                weights_samples.append(self.weight_func_[key](idx))
            weights_samples = np.concatenate(weights_samples)

        if weights_series is not None:
            weights = weights_series
            if weights_samples is not None:
                weights = weights * weights_samples
        else:
            if weights_samples is not None:
                weights = weights_samples

        if weights is not None:
            if np.isnan(weights).any():
                raise ValueError(
                    "The resulting `weights` cannot have NaN values."
                )
            if np.any(weights < 0):
                raise ValueError(
                    "The resulting `weights` cannot have negative values."
                )
            if np.sum(weights) == 0:
                raise ValueError(
                    ("The resulting `weights` cannot be normalized because "
                     "the sum of the weights is zero.")
                )

        return weights

        
    def fit(
        self,
        series: pd.DataFrame,
        exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
        store_in_sample_residuals: bool=True
    ) -> None:
        """
        
        """
        
        # Reset values in case the forecaster has already been fitted.
        self.series_col_names    = None
        self.index_type          = None
        self.index_freq          = None
        self.index_values        = None
        self.last_window         = None
        self.included_exog       = False
        self.exog_type           = None
        self.exog_dtypes         = None
        self.exog_col_names      = None
        self.series_col_names    = None
        self.X_train_col_names   = None
        self.in_sample_residuals = None
        self.fitted              = False
        self.training_range      = None

        X_train, y_train, y_index, y_train_index = self.create_train_X_y(series=series, exog=exog)
        sample_weight = self.create_sample_weights(
                            series        = series,
                            X_train       = X_train,
                            y_train_index = y_train_index,
                        )

        if sample_weight is not None:
            self.regressor.fit(
                X             = X_train,
                y             = y_train,
                sample_weight = sample_weight,
                **self.fit_kwargs
            )
        else:
            self.regressor.fit(X=X_train, y=y_train, **self.fit_kwargs)
            
        self.fitted = True
        self.fit_date = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M:%S')
        self.training_range = y_index[[0, -1]]
        self.index_type = type(y_index)
        if isinstance(y_index, pd.DatetimeIndex):
            self.index_freq = y_index.freqstr
        else: 
            self.index_freq = y_index.step
        self.index_values = y_index

        in_sample_residuals = {}
        
        # This is done to save time during fit in functions such as backtesting()
        if store_in_sample_residuals:

            residuals = y_train - self.regressor.predict(X_train)

            for serie in series.columns:
                in_sample_residuals[serie] = residuals.loc[X_train[serie] == 1.].to_numpy()
                if len(in_sample_residuals[serie]) > 1000:
                    # Only up to 1000 residuals are stored
                    rng = np.random.default_rng(seed=123)
                    in_sample_residuals[serie] = rng.choice(
                                                     a       = in_sample_residuals[serie], 
                                                     size    = 1000, 
                                                     replace = False
                                                 )
        else:
            for serie in series.columns:
                in_sample_residuals[serie] = None

        self.in_sample_residuals = in_sample_residuals

        # The last time window of training data is stored so that lags needed as
        # predictors in the first iteration of `predict()` can be calculated.
        self.last_window = series.iloc[-self.max_lag:, ].copy()


    def _recursive_predict(
        self,
        steps: int,
        level: str,
        last_window: np.ndarray,
        exog: Optional[np.ndarray]=None
    ) -> np.ndarray:
        """
        
        """
        
        predictions = np.full(shape=steps, fill_value=np.nan)

        for i in range(steps):
            X = last_window[-self.lags].reshape(1, -1)
            if exog is not None:
                X = np.column_stack((X, exog[i, ].reshape(1, -1)))
            
            levels_dummies = np.zeros(shape=(1, len(self.series_col_names)), dtype=float)
            levels_dummies[0][self.series_col_names.index(level)] = 1.

            X = np.column_stack((X, levels_dummies.reshape(1, -1)))

            with warnings.catch_warnings():
                # Suppress scikit-learn warning: "X does not have valid feature names,
                # but NoOpTransformer was fitted with feature names".
                warnings.simplefilter("ignore")
                prediction = self.regressor.predict(X)
                predictions[i] = prediction.ravel()[0]

            # Update `last_window` values. The first position is discarded and 
            # the new prediction is added at the end.
            last_window = np.append(last_window[1:], prediction)

        return predictions

            
    def predict(
        self,
        steps: int,
        levels: Optional[Union[str, list]]=None,
        last_window: Optional[pd.DataFrame]=None,
        exog: Optional[Union[pd.Series, pd.DataFrame]]=None
    ) -> pd.DataFrame:
        """

        """
        
        if levels is None:
            levels = self.series_col_names
        elif isinstance(levels, str):
            levels = [levels]

        if last_window is None:
            last_window = self.last_window
        
        check_predict_input(
            forecaster_name  = type(self).__name__,
            steps            = steps,
            fitted           = self.fitted,
            included_exog    = self.included_exog,
            index_type       = self.index_type,
            index_freq       = self.index_freq,
            window_size      = self.window_size,
            last_window      = last_window,
            last_window_exog = None,
            exog             = exog,
            exog_type        = self.exog_type,
            exog_col_names   = self.exog_col_names,
            interval         = None,
            alpha            = None,
            max_steps        = None,
            levels           = levels,
            series_col_names = self.series_col_names
        )

        last_window = last_window.iloc[-self.window_size:, ].copy()
        
        if exog is not None:
            if isinstance(exog, pd.DataFrame):
                exog = transform_dataframe(
                           df                = exog,
                           transformer       = self.transformer_exog,
                           fit               = False,
                           inverse_transform = False
                       )
            else:
                exog = transform_series(
                           series            = exog,
                           transformer       = self.transformer_exog,
                           fit               = False,
                           inverse_transform = False
                       )
            check_exog_dtypes(exog=exog)
            exog_values = exog.to_numpy()[:steps]
        else:
            exog_values = None

        predictions = []

        for level in levels:

            last_window_level = transform_series(
                                    series            = last_window[level],
                                    transformer       = self.transformer_series_[level],
                                    fit               = False,
                                    inverse_transform = False
                                )
            last_window_values, last_window_index = preprocess_last_window(
                                                        last_window = last_window_level
                                                    )
                
            preds_level = self._recursive_predict(
                              steps       = steps,
                              level       = level,
                              last_window = last_window_values,
                              exog        = exog_values
                          )

            preds_level = pd.Series(
                              data  = preds_level,
                              index = expand_index(
                                          index = last_window_index,
                                          steps = steps
                                      ),
                              name = level
                          )

            preds_level = transform_series(
                              series            = preds_level,
                              transformer       = self.transformer_series_[level],
                              fit               = False,
                              inverse_transform = True
                          )

            predictions.append(preds_level)    

        predictions = pd.concat(predictions, axis=1)

        return predictions


    def predict_bootstrapping(
        self,
        steps: int,
        levels: Optional[Union[str, list]]=None,
        last_window: Optional[pd.DataFrame]=None,
        exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
        n_boot: int=500,
        random_state: int=123,
        in_sample_residuals: bool=True
    ) -> dict:
        """
        """
        
        if self.fitted:
            if levels is None:
                levels = self.series_col_names
            elif isinstance(levels, str):
                levels = [levels]

            if in_sample_residuals:
                if not set(levels).issubset(set(self.in_sample_residuals.keys())):
                    raise ValueError(
                        (f"Not `forecaster.in_sample_residuals` for levels: "
                         f"{set(levels) - set(self.in_sample_residuals.keys())}.")
                    )
                residuals_levels = self.in_sample_residuals
            else:
                if self.out_sample_residuals is None:
                    raise ValueError(
                        ("`forecaster.out_sample_residuals` is `None`. Use "
                         "`in_sample_residuals=True` or method "
                         "`set_out_sample_residuals()` before `predict_interval()`, "
                         "`predict_bootstrapping()`,`predict_quantiles()` or "
                         "`predict_dist()`.")
                    )
                else:
                    if not set(levels).issubset(set(self.out_sample_residuals.keys())):
                        raise ValueError(
                            (f"Not `forecaster.out_sample_residuals` for levels: "
                             f"{set(levels) - set(self.out_sample_residuals.keys())}. "
                             f"Use method `set_out_sample_residuals()`.")
                        )
                residuals_levels = self.out_sample_residuals
                    
            check_residuals = (
                "forecaster.in_sample_residuals" if in_sample_residuals
                else "forecaster.out_sample_residuals"
            )
            for level in levels:
                if residuals_levels[level] is None:
                    raise ValueError(
                        (f"forecaster residuals for level '{level}' are `None`. "
                         f"Check `{check_residuals}`.")
                    )
                elif (any(element is None for element in residuals_levels[level]) or
                      np.any(np.isnan(residuals_levels[level]))):
                    raise ValueError(
                        (f"forecaster residuals for level '{level}' contains `None` "
                         f"or `NaNs` values. Check `{check_residuals}`.")
                    )

        if last_window is None:
            last_window = self.last_window

        check_predict_input(
            forecaster_name  = type(self).__name__,
            steps            = steps,
            fitted           = self.fitted,
            included_exog    = self.included_exog,
            index_type       = self.index_type,
            index_freq       = self.index_freq,
            window_size      = self.window_size,
            last_window      = last_window,
            last_window_exog = None,
            exog             = exog,
            exog_type        = self.exog_type,
            exog_col_names   = self.exog_col_names,
            interval         = None,
            alpha            = None,
            max_steps        = None,
            levels           = levels,
            series_col_names = self.series_col_names
        )

        last_window = last_window.iloc[-self.window_size:, ].copy()

        if exog is not None:
            if isinstance(exog, pd.DataFrame):
                exog = transform_dataframe(
                           df                = exog,
                           transformer       = self.transformer_exog,
                           fit               = False,
                           inverse_transform = False
                       )
            else:
                exog = transform_series(
                           series            = exog,
                           transformer       = self.transformer_exog,
                           fit               = False,
                           inverse_transform = False
                       )
            exog_values = exog.to_numpy()[:steps]
        else:
            exog_values = None
        
        boot_predictions = {}

        for level in levels:
        
            last_window_level = transform_series(
                                    series            = last_window[level],
                                    transformer       = self.transformer_series_[level],
                                    fit               = False,
                                    inverse_transform = False
                                )
            last_window_values, last_window_index = preprocess_last_window(
                                                        last_window = last_window_level
                                                    )

            level_boot_predictions = np.full(
                                         shape      = (steps, n_boot),
                                         fill_value = np.nan,
                                         dtype      = float
                                     )
            rng = np.random.default_rng(seed=random_state)
            seeds = rng.integers(low=0, high=10000, size=n_boot)

            residuals = residuals_levels[level]

            for i in range(n_boot):
                # In each bootstraping iteration the initial last_window and exog 
                # need to be restored.
                last_window_boot = last_window_values.copy()
                exog_boot = exog_values.copy() if exog is not None else None

                rng = np.random.default_rng(seed=seeds[i])
                sample_residuals = rng.choice(
                                       a       = residuals,
                                       size    = steps,
                                       replace = True
                                   )

                for step in range(steps):

                    prediction = self._recursive_predict(
                                     steps       = 1,
                                     level       = level,
                                     last_window = last_window_boot,
                                     exog        = exog_boot 
                                 )
                    
                    prediction_with_residual = prediction + sample_residuals[step]
                    level_boot_predictions[step, i] = prediction_with_residual[0]

                    last_window_boot = np.append(
                                           last_window_boot[1:],
                                           prediction_with_residual
                                       )
                    if exog is not None:
                        exog_boot = exog_boot[1:]

            level_boot_predictions = pd.DataFrame(
                                         data    = level_boot_predictions,
                                         index   = expand_index(last_window_index, steps=steps),
                                         columns = [f"pred_boot_{i}" for i in range(n_boot)]
                                     )

            if self.transformer_series_[level]:
                for col in level_boot_predictions.columns:
                    level_boot_predictions[col] = transform_series(
                                                      series            = level_boot_predictions[col],
                                                      transformer       = self.transformer_series_[level],
                                                      fit               = False,
                                                      inverse_transform = True
                                                  )
            
            boot_predictions[level] = level_boot_predictions
        
        return boot_predictions


    def predict_interval(
        self,
        steps: int,
        levels: Optional[Union[str, list]]=None,
        last_window: Optional[pd.DataFrame]=None,
        exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
        interval: list=[5, 95],
        n_boot: int=500,
        random_state: int=123,
        in_sample_residuals: bool=True
    ) -> pd.DataFrame:
        """
        """
        
        if levels is None:
            levels = self.series_col_names
        elif isinstance(levels, str):
            levels = [levels]
        
        check_interval(interval=interval)

        preds = self.predict(
                    steps       = steps,
                    levels      = levels,
                    last_window = last_window,
                    exog        = exog
                )

        boot_predictions = self.predict_bootstrapping(
                               steps               = steps,
                               levels              = levels,
                               last_window         = last_window,
                               exog                = exog,
                               n_boot              = n_boot,
                               random_state        = random_state,
                               in_sample_residuals = in_sample_residuals
                           )

        interval = np.array(interval)/100
        predictions = []

        for level in levels:
            preds_interval = boot_predictions[level].quantile(q=interval, axis=1).transpose()
            preds_interval.columns = [f'{level}_lower_bound', f'{level}_upper_bound']
            predictions.append(preds[level])
            predictions.append(preds_interval)
        
        predictions = pd.concat(predictions, axis=1)

        return predictions

    
    def set_params(
        self, 
        params: dict
    ) -> None:
        """
        Set new values to the parameters of the scikit learn model stored in the
        forecaster.
        
        Parameters
        ----------
        params : dict
            Parameters values.

        Returns
        -------
        None
        
        """

        self.regressor = clone(self.regressor)
        self.regressor.set_params(**params)


    def set_fit_kwargs(
        self, 
        fit_kwargs: dict
    ) -> None:
        """
        Set new values for the additional keyword arguments passed to the `fit` 
        method of the regressor.
        
        Parameters
        ----------
        fit_kwargs : dict
            Dict of the form {"argument": new_value}.

        Returns
        -------
        None
        
        """

        self.fit_kwargs = check_select_fit_kwargs(self.regressor, fit_kwargs=fit_kwargs)
        
        
    def set_lags(
        self, 
        lags: Union[int, list, np.ndarray, range]
    ) -> None:
        """      
        Set new value to the attribute `lags`.
        Attributes `max_lag` and `window_size` are also updated.
        
        Parameters
        ----------
        lags : int, list, numpy ndarray, range
            Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.

            - `int`: include lags from 1 to `lags` (included).
            - `list`, `1d numpy ndarray` or `range`: include only lags present in 
            `lags`, all elements must be int.

        Returns
        -------
        None
        
        """
        
        self.lags = initialize_lags(type(self).__name__, lags)            
        self.max_lag  = max(self.lags)
        self.window_size = max(self.lags)

## Tests

In [42]:
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import series
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [43]:
series = series.head(10)
print(series.shape)
series.columns = ['l1', 'l2']
series.head(2)

(10, 2)


Unnamed: 0,l1,l2
0,0.696469,0.120629
1,0.286139,0.826341


In [55]:
# Exog for series l1
exog_l1 = pd.DataFrame({
              'exog1': np.arange(10),
              'exog2': np.arange(50, 60),
          })


# Exog for series l2
exog_l2 = pd.Series(np.arange(100, 110), name='exog1')


# Dictionary with exog for each series
exog = {
    'l1': exog_l1,
    'l2': exog_l2
}

In [100]:
# Create forecaster
forecaster = ForecasterAutoregMultiSeries(
                 regressor = LinearRegression(),
                 lags = 3,
                 transformer_series = None
             )

In [4]:
import pandas as pd
import numpy as np

exog_l1 = pd.DataFrame({
              'exog1': np.arange(10),
              'exog2': np.arange(50, 60),
          })
exog_l1.index = pd.date_range(start='2020-01-01', periods=10, freq='D')

exog_l1.index.dtype.name

'datetime64[ns]'

In [17]:
exog_test = pd.DataFrame(np.array([3, 7, 15]))
exog_test.index = pd.to_datetime(['2020-01-03', '2020-01-07', '2020-01-15'])
exog_test

Unnamed: 0,0
2020-01-03,3
2020-01-07,7
2020-01-15,15


In [21]:
exog_test.index.dtype.name

'datetime64[ns]'

In [26]:
if not len(set(['A', None])) == 1:
    print(1)

1


In [29]:
series_test = pd.Series(np.arange(20))
series_test.name

In [30]:
series_test = pd.DataFrame(pd.Series(np.arange(20)), columns=['serie1'])
series_test.index = pd.date_range(start='2020-01-01', periods=20, freq='D')
series_test

Unnamed: 0,serie1
2020-01-01,0
2020-01-02,1
2020-01-03,2
2020-01-04,3
2020-01-05,4
2020-01-06,5
2020-01-07,6
2020-01-08,7
2020-01-09,8
2020-01-10,9


In [32]:
%%timeit

series_test.squeeze()

32.7 µs ± 477 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [33]:
%%timeit

series_test.iloc[:, 0]

27 µs ± 444 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [None]:
series_test2 = 

In [11]:
series_1 = pd.Series(np.arange(20))
series_1.index = pd.date_range(start='2020-01-01', periods=20, freq='D')

series_2 = pd.Series(np.arange(10))
series_2.index = pd.date_range(start='2020-01-05', periods=10, freq='D')

In [12]:
series_2.loc[series_1.index]

KeyError: "[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00'), Timestamp('2020-01-03 00:00:00'), Timestamp('2020-01-04 00:00:00'), Timestamp('2020-01-15 00:00:00'), Timestamp('2020-01-16 00:00:00'), Timestamp('2020-01-17 00:00:00'), Timestamp('2020-01-18 00:00:00'), Timestamp('2020-01-19 00:00:00'), Timestamp('2020-01-20 00:00:00')] not in index"

In [35]:
a = None

if a:
    print(1)

In [5]:
from sklearn.base import clone
from skforecast.preprocessing import TimeSeriesDifferentiator

differentiator = TimeSeriesDifferentiator(order=1)
differentiator2 = clone(differentiator)
differentiator2

In [8]:
differentiator2.fit_transform(pd.Series(np.arange(10), index=pd.date_range(start='2020-01-01', periods=10, freq='D')))

  self.initial_values.append(X[0])
  self.last_values.append(X[-1])


array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [19]:
pd.concat([series_test, exog_test], axis=1)

Unnamed: 0,serie1,0
2020-01-01,0,
2020-01-02,1,
2020-01-03,2,3.0
2020-01-04,3,
2020-01-05,4,
2020-01-06,5,
2020-01-07,6,7.0
2020-01-08,7,
2020-01-09,8,
2020-01-10,9,


In [50]:
# Series without exog
# ==============================================================================

# X_train
display(forecaster.create_train_X_y(series=series)[0])

# y_train
display(forecaster.create_train_X_y(series=series)[1])

# y_index
display(forecaster.create_train_X_y(series=series)[2])

# y_train_index
display(forecaster.create_train_X_y(series=series)[3])

Unnamed: 0,lag_1,lag_2,lag_3,level
3,0.226851,0.286139,0.696469,l1
4,0.551315,0.226851,0.286139,l1
5,0.719469,0.551315,0.226851,l1
6,0.423106,0.719469,0.551315,l1
7,0.980764,0.423106,0.719469,l1
8,0.68483,0.980764,0.423106,l1
9,0.480932,0.68483,0.980764,l1
3,0.60306,0.826341,0.120629,l2
4,0.545068,0.60306,0.826341,l2
5,0.342764,0.545068,0.60306,l2


3    0.551315
4    0.719469
5    0.423106
6    0.980764
7    0.684830
8    0.480932
9    0.392118
3    0.545068
4    0.342764
5    0.304121
6    0.417022
7    0.681301
8    0.875457
9    0.510422
Name: y, dtype: float64

None

array([3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

In [59]:
# Series with exog
# ==============================================================================

# X_train
display(forecaster.create_train_X_y(series=series, exog=exog)[0])

# y_train
display(forecaster.create_train_X_y(series=series, exog=exog)[1])

# y_index
display(forecaster.create_train_X_y(series=series, exog=exog)[2])

# y_train_index
display(forecaster.create_train_X_y(series=series, exog=exog)[3])



Unnamed: 0,lag_1,lag_2,lag_3,level,exog1,exog2
3,0.226851,0.286139,0.696469,l1,3,53.0
4,0.551315,0.226851,0.286139,l1,4,54.0
5,0.719469,0.551315,0.226851,l1,5,55.0
6,0.423106,0.719469,0.551315,l1,6,56.0
7,0.980764,0.423106,0.719469,l1,7,57.0
8,0.68483,0.980764,0.423106,l1,8,58.0
9,0.480932,0.68483,0.980764,l1,9,59.0
3,0.60306,0.826341,0.120629,l2,103,
4,0.545068,0.60306,0.826341,l2,104,
5,0.342764,0.545068,0.60306,l2,105,




3    0.551315
4    0.719469
5    0.423106
6    0.980764
7    0.684830
8    0.480932
9    0.392118
3    0.545068
4    0.342764
5    0.304121
6    0.417022
7    0.681301
8    0.875457
9    0.510422
Name: y, dtype: float64



None



array([3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

## Datetime index

In [63]:
series_datetime = series.copy()
series_datetime.index = pd.date_range(start='2020-01-01', periods=len(series_datetime), freq='D')
series_datetime.head(2)

Unnamed: 0,l1,l2
2020-01-01,0.696469,0.120629
2020-01-02,0.286139,0.826341


In [64]:
exog_datetime = exog.copy()

for key in exog_datetime.keys():
    exog_datetime[key].index = series_datetime.index

In [66]:
# Series with exog datetime
# ==============================================================================

# X_train
print("X_train")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_datetime)[0])

# y_train
print("y_train")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_datetime)[1])

# y_index
print("y_index")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_datetime)[2])

# y_train_index
print("y_train_index")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_datetime)[3])

X_train




Unnamed: 0,lag_1,lag_2,lag_3,level,exog1,exog2
2020-01-04,0.226851,0.286139,0.696469,l1,3,53.0
2020-01-05,0.551315,0.226851,0.286139,l1,4,54.0
2020-01-06,0.719469,0.551315,0.226851,l1,5,55.0
2020-01-07,0.423106,0.719469,0.551315,l1,6,56.0
2020-01-08,0.980764,0.423106,0.719469,l1,7,57.0
2020-01-09,0.68483,0.980764,0.423106,l1,8,58.0
2020-01-10,0.480932,0.68483,0.980764,l1,9,59.0
2020-01-04,0.60306,0.826341,0.120629,l2,103,
2020-01-05,0.545068,0.60306,0.826341,l2,104,
2020-01-06,0.342764,0.545068,0.60306,l2,105,


y_train




2020-01-04    0.551315
2020-01-05    0.719469
2020-01-06    0.423106
2020-01-07    0.980764
2020-01-08    0.684830
2020-01-09    0.480932
2020-01-10    0.392118
2020-01-04    0.545068
2020-01-05    0.342764
2020-01-06    0.304121
2020-01-07    0.417022
2020-01-08    0.681301
2020-01-09    0.875457
2020-01-10    0.510422
Name: y, dtype: float64

y_index




None

y_train_index




array(['2020-01-04T00:00:00.000000000', '2020-01-05T00:00:00.000000000',
       '2020-01-06T00:00:00.000000000', '2020-01-07T00:00:00.000000000',
       '2020-01-08T00:00:00.000000000', '2020-01-09T00:00:00.000000000',
       '2020-01-10T00:00:00.000000000', '2020-01-04T00:00:00.000000000',
       '2020-01-05T00:00:00.000000000', '2020-01-06T00:00:00.000000000',
       '2020-01-07T00:00:00.000000000', '2020-01-08T00:00:00.000000000',
       '2020-01-09T00:00:00.000000000', '2020-01-10T00:00:00.000000000'],
      dtype='datetime64[ns]')

# Series dict

In [71]:
series_dict = series_datetime.to_dict('series')
series_dict

{'l1': 2020-01-01    0.696469
 2020-01-02    0.286139
 2020-01-03    0.226851
 2020-01-04    0.551315
 2020-01-05    0.719469
 2020-01-06    0.423106
 2020-01-07    0.980764
 2020-01-08    0.684830
 2020-01-09    0.480932
 2020-01-10    0.392118
 Freq: D, Name: l1, dtype: float64,
 'l2': 2020-01-01    0.120629
 2020-01-02    0.826341
 2020-01-03    0.603060
 2020-01-04    0.545068
 2020-01-05    0.342764
 2020-01-06    0.304121
 2020-01-07    0.417022
 2020-01-08    0.681301
 2020-01-09    0.875457
 2020-01-10    0.510422
 Freq: D, Name: l2, dtype: float64}

In [70]:
# series_dict with exog datetime
# ==============================================================================

# X_train
print("X_train")
display(forecaster.create_train_X_y(series=series_dict, exog=exog_datetime)[0])

# y_train
print("y_train")
display(forecaster.create_train_X_y(series=series_dict, exog=exog_datetime)[1])

# y_index
print("y_index")
display(forecaster.create_train_X_y(series=series_dict, exog=exog_datetime)[2])

# y_train_index
print("y_train_index")
display(forecaster.create_train_X_y(series=series_dict, exog=exog_datetime)[3])

X_train




Unnamed: 0,lag_1,lag_2,lag_3,level,exog1,exog2
2020-01-04,0.226851,0.286139,0.696469,l1,3,53.0
2020-01-05,0.551315,0.226851,0.286139,l1,4,54.0
2020-01-06,0.719469,0.551315,0.226851,l1,5,55.0
2020-01-07,0.423106,0.719469,0.551315,l1,6,56.0
2020-01-08,0.980764,0.423106,0.719469,l1,7,57.0
2020-01-09,0.68483,0.980764,0.423106,l1,8,58.0
2020-01-10,0.480932,0.68483,0.980764,l1,9,59.0
2020-01-04,0.60306,0.826341,0.120629,l2,103,
2020-01-05,0.545068,0.60306,0.826341,l2,104,
2020-01-06,0.342764,0.545068,0.60306,l2,105,


y_train




2020-01-04    0.551315
2020-01-05    0.719469
2020-01-06    0.423106
2020-01-07    0.980764
2020-01-08    0.684830
2020-01-09    0.480932
2020-01-10    0.392118
2020-01-04    0.545068
2020-01-05    0.342764
2020-01-06    0.304121
2020-01-07    0.417022
2020-01-08    0.681301
2020-01-09    0.875457
2020-01-10    0.510422
Name: y, dtype: float64

y_index




None

y_train_index




array(['2020-01-04T00:00:00.000000000', '2020-01-05T00:00:00.000000000',
       '2020-01-06T00:00:00.000000000', '2020-01-07T00:00:00.000000000',
       '2020-01-08T00:00:00.000000000', '2020-01-09T00:00:00.000000000',
       '2020-01-10T00:00:00.000000000', '2020-01-04T00:00:00.000000000',
       '2020-01-05T00:00:00.000000000', '2020-01-06T00:00:00.000000000',
       '2020-01-07T00:00:00.000000000', '2020-01-08T00:00:00.000000000',
       '2020-01-09T00:00:00.000000000', '2020-01-10T00:00:00.000000000'],
      dtype='datetime64[ns]')

## Series different lengths

In [73]:
series_diff = series_datetime.copy()
series_diff.iloc[:3, 1] = np.nan
series_diff.head(5)

# exog is greater that series

Unnamed: 0,l1,l2
2020-01-01,0.696469,
2020-01-02,0.286139,
2020-01-03,0.226851,
2020-01-04,0.551315,0.545068
2020-01-05,0.719469,0.342764


In [75]:
# series diff lengths with exog datetime
# ==============================================================================

# X_train
print("X_train")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_datetime)[0])

# y_train
print("y_train")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_datetime)[1])

# y_index
print("y_index")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_datetime)[2])

# y_train_index
print("y_train_index")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_datetime)[3])

X_train




Unnamed: 0,lag_1,lag_2,lag_3,level,exog1,exog2
2020-01-04,0.226851,0.286139,0.696469,l1,3,53.0
2020-01-05,0.551315,0.226851,0.286139,l1,4,54.0
2020-01-06,0.719469,0.551315,0.226851,l1,5,55.0
2020-01-07,0.423106,0.719469,0.551315,l1,6,56.0
2020-01-08,0.980764,0.423106,0.719469,l1,7,57.0
2020-01-09,0.68483,0.980764,0.423106,l1,8,58.0
2020-01-10,0.480932,0.68483,0.980764,l1,9,59.0
2020-01-07,0.304121,0.342764,0.545068,l2,106,
2020-01-08,0.417022,0.304121,0.342764,l2,107,
2020-01-09,0.681301,0.417022,0.304121,l2,108,


y_train




2020-01-04    0.551315
2020-01-05    0.719469
2020-01-06    0.423106
2020-01-07    0.980764
2020-01-08    0.684830
2020-01-09    0.480932
2020-01-10    0.392118
2020-01-07    0.417022
2020-01-08    0.681301
2020-01-09    0.875457
2020-01-10    0.510422
Name: y, dtype: float64

y_index




None

y_train_index




array(['2020-01-04T00:00:00.000000000', '2020-01-05T00:00:00.000000000',
       '2020-01-06T00:00:00.000000000', '2020-01-07T00:00:00.000000000',
       '2020-01-08T00:00:00.000000000', '2020-01-09T00:00:00.000000000',
       '2020-01-10T00:00:00.000000000', '2020-01-07T00:00:00.000000000',
       '2020-01-08T00:00:00.000000000', '2020-01-09T00:00:00.000000000',
       '2020-01-10T00:00:00.000000000'], dtype='datetime64[ns]')

In [78]:
# exog is same as series
exog_diff = exog_datetime.copy()
exog_diff['l2'] = pd.Series(np.arange(103, 110), name='exog1')
exog_diff['l2'].index = pd.date_range(start='2020-01-04', periods=len(exog_diff['l2']), freq='D')

In [79]:
# series_dict with exog datetime
# ==============================================================================

# X_train
print("X_train")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_diff)[0])

# y_train
print("y_train")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_diff)[1])

# y_index
print("y_index")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_diff)[2])

# y_train_index
print("y_train_index")
display(forecaster.create_train_X_y(series=series_diff, exog=exog_diff)[3])

X_train




Unnamed: 0,lag_1,lag_2,lag_3,level,exog1,exog2
2020-01-04,0.226851,0.286139,0.696469,l1,3,53.0
2020-01-05,0.551315,0.226851,0.286139,l1,4,54.0
2020-01-06,0.719469,0.551315,0.226851,l1,5,55.0
2020-01-07,0.423106,0.719469,0.551315,l1,6,56.0
2020-01-08,0.980764,0.423106,0.719469,l1,7,57.0
2020-01-09,0.68483,0.980764,0.423106,l1,8,58.0
2020-01-10,0.480932,0.68483,0.980764,l1,9,59.0
2020-01-07,0.304121,0.342764,0.545068,l2,106,
2020-01-08,0.417022,0.304121,0.342764,l2,107,
2020-01-09,0.681301,0.417022,0.304121,l2,108,


y_train




2020-01-04    0.551315
2020-01-05    0.719469
2020-01-06    0.423106
2020-01-07    0.980764
2020-01-08    0.684830
2020-01-09    0.480932
2020-01-10    0.392118
2020-01-07    0.417022
2020-01-08    0.681301
2020-01-09    0.875457
2020-01-10    0.510422
Name: y, dtype: float64

y_index




None

y_train_index




array(['2020-01-04T00:00:00.000000000', '2020-01-05T00:00:00.000000000',
       '2020-01-06T00:00:00.000000000', '2020-01-07T00:00:00.000000000',
       '2020-01-08T00:00:00.000000000', '2020-01-09T00:00:00.000000000',
       '2020-01-10T00:00:00.000000000', '2020-01-07T00:00:00.000000000',
       '2020-01-08T00:00:00.000000000', '2020-01-09T00:00:00.000000000',
       '2020-01-10T00:00:00.000000000'], dtype='datetime64[ns]')

In [84]:
# exog is less than series
exog_diff = exog_datetime.copy()
exog_diff['l2'] = pd.Series(np.arange(105, 110), name='exog1')
exog_diff['l2'].index = pd.date_range(start='2020-01-06', periods=len(exog_diff['l2']), freq='D')

In [102]:
# series_dict with exog datetime
# ==============================================================================

# X_train
print("X_train")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_diff)[0])

# y_train
print("y_train")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_diff)[1])

# y_index
print("y_index")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_diff)[2])

# y_train_index
print("y_train_index")
display(forecaster.create_train_X_y(series=series_datetime, exog=exog_diff)[3])

X_train




Unnamed: 0,lag_1,lag_2,lag_3,_l1,_l2,exog1,exog2
2020-01-04,0.226851,0.286139,0.696469,1.0,0.0,3.0,53.0
2020-01-05,0.551315,0.226851,0.286139,1.0,0.0,4.0,54.0
2020-01-06,0.719469,0.551315,0.226851,1.0,0.0,5.0,55.0
2020-01-07,0.423106,0.719469,0.551315,1.0,0.0,6.0,56.0
2020-01-08,0.980764,0.423106,0.719469,1.0,0.0,7.0,57.0
2020-01-09,0.68483,0.980764,0.423106,1.0,0.0,8.0,58.0
2020-01-10,0.480932,0.68483,0.980764,1.0,0.0,9.0,59.0
2020-01-04,0.60306,0.826341,0.120629,0.0,1.0,,
2020-01-05,0.545068,0.60306,0.826341,0.0,1.0,,
2020-01-06,0.342764,0.545068,0.60306,0.0,1.0,105.0,


y_train




2020-01-04    0.551315
2020-01-05    0.719469
2020-01-06    0.423106
2020-01-07    0.980764
2020-01-08    0.684830
2020-01-09    0.480932
2020-01-10    0.392118
2020-01-04    0.545068
2020-01-05    0.342764
2020-01-06    0.304121
2020-01-07    0.417022
2020-01-08    0.681301
2020-01-09    0.875457
2020-01-10    0.510422
Name: y, dtype: float64

y_index




None

y_train_index




array(['2020-01-04T00:00:00.000000000', '2020-01-05T00:00:00.000000000',
       '2020-01-06T00:00:00.000000000', '2020-01-07T00:00:00.000000000',
       '2020-01-08T00:00:00.000000000', '2020-01-09T00:00:00.000000000',
       '2020-01-10T00:00:00.000000000', '2020-01-04T00:00:00.000000000',
       '2020-01-05T00:00:00.000000000', '2020-01-06T00:00:00.000000000',
       '2020-01-07T00:00:00.000000000', '2020-01-08T00:00:00.000000000',
       '2020-01-09T00:00:00.000000000', '2020-01-10T00:00:00.000000000'],
      dtype='datetime64[ns]')