In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [2]:
import numpy as np
import pandas as pd
from typing import Union

In [3]:
exog = pd.DataFrame({'exog_1': np.arange(100, 110, dtype=float),
                     'exog_2': np.arange(1000, 1010, dtype=float)})

In [4]:
def exog_to_direct(
    exog: Union[pd.Series, pd.DataFrame],
    steps: int
)-> pd.DataFrame:
    """    
    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}.")

    if isinstance(exog, pd.Series):
        exog = exog.to_frame()

    n_rows = len(exog)
    exog_idx = exog.index
    exog_transformed = []

    for i in range(steps):
        exog_column_transformed = exog.iloc[i : n_rows - (steps - 1 - i), ]
        exog_column_transformed.index = pd.RangeIndex(len(exog_column_transformed))
        exog_column_transformed.columns = [f"{col}_step_{i+1}" for col in exog_column_transformed.columns]
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = pd.concat(exog_transformed, axis=1, copy=False)
    else:
        exog_transformed = exog_column_transformed

    exog_transformed.index = exog_idx[-len(exog_transformed):]
    
    return exog_transformed

In [5]:
exog_to_direct(exog, 2)

Unnamed: 0,exog_1_step_1,exog_2_step_1,exog_1_step_2,exog_2_step_2
1,100.0,1000.0,101.0,1001.0
2,101.0,1001.0,102.0,1002.0
3,102.0,1002.0,103.0,1003.0
4,103.0,1003.0,104.0,1004.0
5,104.0,1004.0,105.0,1005.0
6,105.0,1005.0,106.0,1006.0
7,106.0,1006.0,107.0,1007.0
8,107.0,1007.0,108.0,1008.0
9,108.0,1008.0,109.0,1009.0


In [6]:
exog.to_numpy()

array([[ 100., 1000.],
       [ 101., 1001.],
       [ 102., 1002.],
       [ 103., 1003.],
       [ 104., 1004.],
       [ 105., 1005.],
       [ 106., 1006.],
       [ 107., 1007.],
       [ 108., 1008.],
       [ 109., 1009.]])

In [7]:
exog.to_numpy()[:2]

array([[ 100., 1000.],
       [ 101., 1001.]])

In [8]:
def exog_to_direct_numpy(
    exog: Union[pd.Series, pd.DataFrame],
    steps: int
)-> pd.DataFrame:
    """    
    """

    if not isinstance(exog, np.ndarray):
        raise TypeError(f"`exog` must be a numpy ndarray. Got {type(exog)}.")

    if exog.ndim == 1:
        exog = np.expand_dims(exog, axis=1)

    n_rows = len(exog)
    exog_transformed = []

    for i in range(steps):
        exog_column_transformed = exog[i : n_rows - (steps - 1 - i)]
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.concatenate(exog_transformed, axis=1)
    else:
        exog_transformed = exog_column_transformed
    
    return exog_transformed

In [9]:
exog_to_direct_numpy(exog.to_numpy(), 2)

array([[ 100., 1000.,  101., 1001.],
       [ 101., 1001.,  102., 1002.],
       [ 102., 1002.,  103., 1003.],
       [ 103., 1003.,  104., 1004.],
       [ 104., 1004.,  105., 1005.],
       [ 105., 1005.,  106., 1006.],
       [ 106., 1006.,  107., 1007.],
       [ 107., 1007.,  108., 1008.],
       [ 108., 1008.,  109., 1009.]])

In [10]:
exog_to_direct_numpy(exog.to_numpy(), 1)

array([[ 100., 1000.],
       [ 101., 1001.],
       [ 102., 1002.],
       [ 103., 1003.],
       [ 104., 1004.],
       [ 105., 1005.],
       [ 106., 1006.],
       [ 107., 1007.],
       [ 108., 1008.],
       [ 109., 1009.]])

In [11]:
# Profiling `exog_to_direct` for different length of columns
# ======================================================================================
import time 

n_columns = [2, 5, 10, 20, 25]
n_steps = [2, 6, 12, 24, 36]
results = {}

for steps in n_steps:
    execution_time = []

    for n in n_columns:
        df = pd.DataFrame(np.arange(100000).reshape(int(100000/n), n)).head(1000)
        tic = time.perf_counter()
        _ = exog_to_direct(exog=df, steps=steps)
        toc = time.perf_counter()
        execution_time.append(toc-tic)

    results[steps] = execution_time

results = pd.DataFrame(
              data =  results,
              index = n_columns
          )

results

Unnamed: 0,2,6,12,24,36
2,0.000544,0.000747,0.001503,0.002295,0.003407
5,0.00043,0.000718,0.002308,0.0025,0.003694
10,0.000384,0.000902,0.002028,0.002863,0.00463
20,0.000467,0.000885,0.001836,0.003482,0.004993
25,0.000404,0.000943,0.001969,0.003627,0.005249


In [12]:
# Profiling `exog_to_direct` for different length of columns
# ======================================================================================
import time 

n_columns = [2, 5, 10, 20, 25]
n_steps = [2, 6, 12, 24, 36]
results = {}

for steps in n_steps:
    execution_time = []

    for n in n_columns:
        df = np.arange(1000).reshape(int(1000/n), n)
        tic = time.perf_counter()
        _ = exog_to_direct_numpy(exog=df, steps=steps)
        toc = time.perf_counter()
        execution_time.append(toc-tic)

    results[steps] = execution_time

results = pd.DataFrame(
              data =  results,
              index = n_columns
          )

results

Unnamed: 0,2,6,12,24,36
2,0.000162,1.7e-05,2.9e-05,5.4e-05,7.6e-05
5,8e-06,1e-05,2.7e-05,3e-05,3.9e-05
10,5e-06,8e-06,1.2e-05,2.1e-05,2.9e-05
20,4e-06,7e-06,1e-05,1.7e-05,2.2e-05
25,4e-06,6e-06,1e-05,2e-05,2.1e-05


In [13]:
def exog_to_direct_numpy_old(
    exog: np.ndarray,
    steps: int
)-> np.ndarray:
    """
    Transforms `exog` to `np.ndarray` with the shape needed for direct
    forecasting.
    
    Parameters
    ----------        
    exog : numpy ndarray, shape(samples,)
        Exogenous variables.

    steps : int.
        Number of steps that will be predicted using exog.

    Returns 
    -------
    exog_transformed : numpy ndarray
        Exogenous variables transformed.

    """

    exog_transformed = []
    
    if exog.ndim == 1:
        exog = np.expand_dims(exog, axis=1)

    for i in range(exog.shape[1]):
        exog_column = exog[:, i]
        exog_column_transformed = np.vstack(
            [np.roll(exog_column, j) for j in range(steps)]
        ).T[steps - 1:]
        exog_column_transformed = exog_column_transformed[:, ::-1]
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.concatenate(exog_transformed, axis=1)
    else:
        exog_transformed = exog_column_transformed

    return exog_transformed

In [14]:
# Profiling `exog_to_direct` for different length of columns
# ======================================================================================
import time 

n_columns = [2, 5, 10, 20, 25]
n_steps = [2, 6, 12, 24, 36]
results = {}

for steps in n_steps:
    execution_time = []

    for n in n_columns:
        df = np.arange(1000).reshape(int(1000/n), n)
        tic = time.perf_counter()
        _ = exog_to_direct_numpy_old(exog=df, steps=steps)
        toc = time.perf_counter()
        execution_time.append(toc-tic)

    results[steps] = execution_time

results = pd.DataFrame(
              data =  results,
              index = n_columns
          )

results

Unnamed: 0,2,6,12,24,36
2,0.000149,0.000139,0.000267,0.00045,0.000636
5,0.000169,0.000322,0.000686,0.001002,0.001492
10,0.000379,0.000637,0.001255,0.001936,0.00301
20,0.000532,0.001223,0.002165,0.003904,0.005968
25,0.00056,0.001449,0.0026,0.004808,0.007452


In [23]:
exog

Unnamed: 0,exog_1,exog_2
0,100.0,1000.0
1,101.0,1001.0
2,102.0,1002.0
3,103.0,1003.0
4,104.0,1004.0
5,105.0,1005.0
6,106.0,1006.0
7,107.0,1007.0
8,108.0,1008.0
9,109.0,1009.0


In [22]:
%%timeit -r 4 -n 100

exog.to_numpy()[:5]

1.65 µs ± 193 ns per loop (mean ± std. dev. of 4 runs, 100 loops each)


In [25]:
exog.to_numpy()[:5]

array([[ 100., 1000.],
       [ 101., 1001.],
       [ 102., 1002.],
       [ 103., 1003.],
       [ 104., 1004.]])

In [24]:
%%timeit -r 4 -n 100

exog.iloc[:5,].to_numpy()

32.2 µs ± 1.29 µs per loop (mean ± std. dev. of 4 runs, 100 loops each)


In [26]:
exog.iloc[:5,].to_numpy()

array([[ 100., 1000.],
       [ 101., 1001.],
       [ 102., 1002.],
       [ 103., 1003.],
       [ 104., 1004.]])