In [23]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'/home/ubuntu/varios/skforecast'

In [24]:
import numpy as np 
import pandas as pd
from typing import Tuple, Optional, Union
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from sklearn.linear_model import LinearRegression
from skforecast.utils import check_y
from skforecast.utils import transform_series
from skforecast.utils import preprocess_y
import time
from numba import njit

In [29]:
def _create_lags(
    self, 
    y: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
    """       
    Transforms a 1d array into a 2d array (X) and a 1d array (y). Each row
    in X is associated with a value of y and it represents the lags that
    precede it.
    
    Notice that, the returned matrix X_data, contains the lag 1 in the first
    column, the lag 2 in the second column and so on.
    
    Parameters
    ----------        
    y : 1d numpy ndarray
        Training time series.

    Returns 
    -------
    X_data : 2d numpy ndarray, shape (samples - max(self.lags), len(self.lags))
        2d numpy array with the lagged values (predictors).
    
    y_data : 1d numpy ndarray, shape (samples - max(self.lags),)
        Values of the time series related to each row of `X_data`.
    
    """
        
    n_splits = len(y) - self.max_lag
    if n_splits <= 0:
        raise ValueError(
            f'The maximum lag ({self.max_lag}) must be less than the length '
            f'of the series ({len(y)}).'
        )
    
    X_data = np.full(shape=(n_splits, len(self.lags)), fill_value=np.nan, dtype=float)

    for i, lag in enumerate(self.lags):
        X_data[:, i] = y[self.max_lag - lag: -lag]

    y_data = y[self.max_lag:]
        
    return X_data, y_data

@njit
def _create_lags_jit(
    max_lag,
    lags,
    y: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
    """       
    Transforms a 1d array into a 2d array (X) and a 1d array (y). Each row
    in X is associated with a value of y and it represents the lags that
    precede it.
    
    Notice that, the returned matrix X_data, contains the lag 1 in the first
    column, the lag 2 in the second column and so on.
    
    Parameters
    ----------        
    y : 1d numpy ndarray
        Training time series.

    Returns 
    -------
    X_data : 2d numpy ndarray, shape (samples - max(self.lags), len(self.lags))
        2d numpy array with the lagged values (predictors).
    
    y_data : 1d numpy ndarray, shape (samples - max(self.lags),)
        Values of the time series related to each row of `X_data`.
    
    """
        
    n_splits = len(y) - max_lag    
    X_data = np.empty(shape=(n_splits, len(lags)))

    for i in range(len(lags)):
        X_data[:, i] = y[max_lag - lags[i]: -lags[i]]

    y_data = y[max_lag:]
        
    return X_data, y_data

In [30]:
lags = 500
self = ForecasterAutoreg(regressor=LinearRegression(), lags=lags)
y = pd.Series(np.random.normal(size=100000))

In [31]:
start = time.time()
X = _create_lags(self, y=y.values)
print(f"Time taken: {time.time() - start}")

Time taken: 0.25193142890930176


In [34]:
start = time.time()
X = _create_lags_jit(max_lag = self.max_lag, lags=self.lags, y=y.values)
print(f"Time taken: {time.time() - start}")

Time taken: 0.6670100688934326
