In [15]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'/home/ximo/Documents/GitHub/skforecast'

In [16]:
import numpy as np 
import pandas as pd
from typing import Tuple, Optional, Union
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from sklearn.linear_model import LinearRegression
from skforecast.utils import check_y
from skforecast.utils import transform_series
from skforecast.utils import preprocess_y
import time
import numba as nb

# Create lags

In [23]:
def _create_lags(
    self, 
    y: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
    """       
    Transforms a 1d array into a 2d array (X) and a 1d array (y). Each row
    in X is associated with a value of y and it represents the lags that
    precede it.
    
    Notice that, the returned matrix X_data, contains the lag 1 in the first
    column, the lag 2 in the second column and so on.
    
    Parameters
    ----------        
    y : 1d numpy ndarray
        Training time series.

    Returns 
    -------
    X_data : 2d numpy ndarray, shape (samples - max(self.lags), len(self.lags))
        2d numpy array with the lagged values (predictors).
    
    y_data : 1d numpy ndarray, shape (samples - max(self.lags),)
        Values of the time series related to each row of `X_data`.
    
    """
        
    n_splits = len(y) - self.max_lag
    if n_splits <= 0:
        raise ValueError(
            f'The maximum lag ({self.max_lag}) must be less than the length '
            f'of the series ({len(y)}).'
        )
    
    X_data = np.full(shape=(n_splits, len(self.lags)), fill_value=np.nan, dtype=float)

    for i, lag in enumerate(self.lags):
        X_data[:, i] = y[self.max_lag - lag: -lag]

    y_data = y[self.max_lag:]
        
    return X_data, y_data



def _create_lags_new(
    self, 
    y: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
    """       
    Transforms a 1d array into a 2d array (X) and a 1d array (y). Each row
    in X is associated with a value of y and it represents the lags that
    precede it.
    
    Notice that, the returned matrix X_data, contains the lag 1 in the first
    column, the lag 2 in the second column and so on.
    
    Parameters
    ----------        
    y : 1d numpy ndarray
        Training time series.

    Returns 
    -------
    X_data : 2d numpy ndarray, shape (samples - max(self.lags), len(self.lags))
        2d numpy array with the lagged values (predictors).
    
    y_data : 1d numpy ndarray, shape (samples - max(self.lags),)
        Values of the time series related to each row of `X_data`.
    
    """
    
    n_splits = len(y) - self.max_lag
    if n_splits <= 0:
        raise ValueError(
            f'The maximum lag ({self.max_lag}) must be less than the length '
            f'of the series ({len(y)}).'
        )

    index_array = np.arange(self.max_lag)[:, None] - np.arange(self.max_lag - len(y), dtype=int)

    X_data = y[index_array[:-1 * len(self.lags), :len(self.lags)]]
    y_data = y[self.max_lag:]

    return X_data, y_data


In [26]:
lags = 500
self = ForecasterAutoreg(regressor=LinearRegression(), lags=lags)
y = np.random.normal(size=100000)

start = time.time()
X = _create_lags(self, y=y)
print(f"Time taken _create_lags: {time.time() - start}")



start = time.time()
X = _create_lags_new(self, y=y)
print(f"Time taken _create_lags_new: {time.time() - start}")

Time taken _create_lags: 0.6470608711242676
Time taken _create_lags_new: 0.0012028217315673828


# Exog to direc

In [47]:
# Current implementation with numpy
# ==============================================================================
def exog_to_direct_numpy(
    exog: np.ndarray,
    steps: int
)-> np.ndarray:
    """
    Transforms `exog` to `np.ndarray` with the shape needed for direct
    forecasting.
    
    Parameters
    ----------        
    exog : numpy ndarray, shape(samples,)
        Time series values.

    steps : int.
        Number of steps that will be predicted using this exog.

    Returns 
    -------
    exog_transformed : numpy ndarray
        Exog transformed.

    """

    exog_transformed = []

    if exog.ndim < 2:
        exog = exog.reshape(-1, 1)

    for column in range(exog.shape[1]):

        exog_column_transformed = []
        
        for i in range(exog.shape[0] - (steps -1)):
            exog_column_transformed.append(exog[i:i + steps, column])
        
        if len(exog_column_transformed) > 1:
            exog_column_transformed = np.vstack(exog_column_transformed)
        
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.hstack(exog_transformed)
    else:
        exog_transformed = exog_column_transformed

    return exog_transformed


# Implementation with numpy suggested by chatgtp
# ==============================================================================
def exog_to_direct_numpy_new(
        exog: np.ndarray,
        steps: int
) -> np.ndarray:
    """
    Transforms `exog` to `numpy.ndarray` with the shape needed for direct forecasting.

    Parameters
    ----------
    exog : numpy ndarray, shape(samples, columns)
        Time series values.

    steps : int.
        Number of steps that will be predicted using this exog.

    Returns
    -------
    exog_transformed : numpy ndarray
        Exog transformed.

    """
    exog_transformed = []
    
    if exog.ndim == 1:
        exog = np.expand_dims(exog, axis=1)

    for i in range(exog.shape[1]):
        exog_column = exog[:, i]
        exog_column_transformed = np.vstack(
            [np.roll(exog_column, j) for j in range(steps)]
        ).T[steps - 1:]
        exog_column_transformed = exog_column_transformed[:, ::-1]
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.concatenate(exog_transformed, axis=1)
    else:
        exog_transformed = exog_column_transformed

    return exog_transformed




# Implementation with pandas suggested by chatgtp
# ==============================================================================
def exog_to_direct_pandas(
    exog: Union[np.ndarray, pd.DataFrame],
    steps: int
)-> np.ndarray:
    """
    Transforms `exog` to `pd.DataFrame` with the shape needed for direct
    forecasting.

    Parameters
    ----------
    exog : pandas DataFrame, shape(samples, columns)
        Time series values.

    steps : int.
        Number of steps that will be predicted using this exog.

    Returns
    -------
    exog_transformed : pandas DataFrame
        Exog transformed.

    """

    exog_transformed = []

    if exog.ndim == 1:
        exog = exog.to_frame()

    for column in exog.columns:

        exog_column_transformed = [exog[column].shift(i) for i in range(steps)][::-1]
        exog_column_transformed = pd.concat(exog_column_transformed, axis=1).dropna()
        exog_column_transformed.columns = [f"{column}_step_{i+1}" for i in range(steps)]

        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = pd.concat(exog_transformed, axis=1)
    else:
        exog_transformed = exog_column_transformed

    return exog_transformed

In [48]:
lags = 5
steps = 3
exog = np.arange(10000)
print(f"Initial exog shape: {exog.shape}")

print("")
print("Current implementation with numpy")
start = time.time()
X_1 = exog_to_direct_numpy(steps=steps, exog=exog)
print(f"Time taken: {time.time() - start}")
print(X_1.shape)
print("")

print("Implementation with numpy suggested by chatgtp")
start = time.time()
X_2 = exog_to_direct_numpy_new(steps=steps, exog=exog)
print(f"Time taken: {time.time() - start}")
print(X_2.shape)
print("")


print("Implementation with pandas suggested by chatgtp")
exog = pd.Series(exog)
start = time.time()
X_3 = exog_to_direct_pandas(steps=steps, exog=exog)
print(f"Time taken: {time.time() - start}")
print(X_3.shape)
print("")


np.testing.assert_array_equal(X_1, X_2)
np.testing.assert_array_equal(X_1, X_3.to_numpy())

Initial exog shape: (10000,)

Current implementation with numpy
Time taken: 0.023429155349731445
(9998, 3)

Implementation with numpy suggested by chatgtp
Time taken: 0.00039386749267578125
(9998, 3)

Implementation with pandas suggested by chatgtp
Time taken: 0.003916263580322266
(9998, 3)



In [49]:
lags = 5
steps = 3
self = ForecasterAutoregDirect(regressor=LinearRegression(), lags=lags, steps=steps)

exog = np.arange(10000).reshape(-1, 2)
print(f"Initial exog shape: {exog.shape}")
print("")

print("Current implementation with numpy")
start = time.time()
X_1 = exog_to_direct_numpy(steps=steps, exog=exog)
print(f"Time taken: {time.time() - start}")
print(X_1.shape)
print("")

print("Implementation with numpy suggested by chatgtp")
start = time.time()
X_2 = exog_to_direct_numpy_new(steps=steps, exog=exog)
print(f"Time taken: {time.time() - start}")
print(X_2.shape)
print("")


print("Implementation with pandas suggested by chatgtp")
exog = pd.DataFrame(exog)
start = time.time()
X_3 = exog_to_direct_pandas(steps=steps, exog=exog)
print(f"Time taken: {time.time() - start}")
print(X_3.shape)
print("")


np.testing.assert_array_equal(X_1, X_2)
np.testing.assert_array_equal(X_1, X_3.to_numpy())

Initial exog shape: (5000, 2)

Current implementation with numpy
Time taken: 0.012891292572021484
(4998, 6)

Implementation with numpy suggested by chatgtp
Time taken: 0.0002994537353515625
(4998, 6)

Implementation with pandas suggested by chatgtp
Time taken: 0.0060727596282958984
(4998, 6)

