In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/ubuntu/varios/skforecast'

In [32]:
import pandas as pd
import numpy as np
from typing import Union, Tuple

In [33]:
def preprocess_exog(
    exog: Union[pd.Series, pd.DataFrame]
) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values, index and dtypes of series or data frame separately. Index is
    overwritten  according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    exog : pandas Series, pandas DataFrame
        Exogenous variables.

    Returns 
    -------
    exog_values : numpy ndarray
        Numpy array with values of `exog`.

    exog_index : pandas Index
        Index of `exog` modified according to the rules.

    """
    
    if isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is not None:
        exog_index = exog.index
    elif isinstance(exog.index, pd.RangeIndex):
        exog_index = exog.index
    elif isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is None:
        warnings.warn(
            ('`exog` has DatetimeIndex index but no frequency. '
             'Index is overwritten with a RangeIndex of step 1.')
        )
        exog_index = pd.RangeIndex(
                         start = 0,
                         stop  = len(exog),
                         step  = 1
                     )

    else:
        warnings.warn(
            ('`exog` has no DatetimeIndex nor RangeIndex index. '
             'Index is overwritten with a RangeIndex.')
        )
        exog_index = pd.RangeIndex(
                         start = 0,
                         stop  = len(exog),
                         step  = 1
                     )

    if isinstance(exog, pd.Series):
        exog_dtypes = {exog.name: exog.dtypes}
    else:
        exog_dtypes = exog.dtypes.to_dict()

    exog_values = exog.to_numpy()

    return exog_values, exog_index, exog_dtypes

In [34]:
exog = pd.DataFrame({
    'col1': [2,3,5],
    'col2': [4,5,6]
})
preprocess_exog(exog)

(array([[2, 4],
        [3, 5],
        [5, 6]]),
 RangeIndex(start=0, stop=3, step=1),
 {'col1': dtype('int64'), 'col2': dtype('int64')})

In [36]:
exog = pd.Series([1,2,3], dtype=float)
preprocess_exog(exog)

(array([1., 2., 3.]),
 RangeIndex(start=0, stop=3, step=1),
 {None: dtype('float64')})