In [None]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

# Descripción

Propuesta para que los forecasters tengan un argumento `calendar_features` con el que pasar una función o un transformer que extraiga características de un índice datetime.

Cuando existes otras exógenas, esto puede hacerse con transformer_exog, pero pensando en el usuario, y dado que en series temporales las features de calendario son muy comunes, sería interesante tener un argumento que permita hacerlo de forma más sencilla.

Cuando no hay otras exógenas, no se puede hacer mediante un transformer_exog.

Comparar las posibilidades utilizando sklearn y con feature-engine.

# Aproximación con sklearn + skforecast

In [None]:
import pandas as pd
import numpy as np
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from sklearn.linear_model import Ridge
from skforecast.preprocessing import create_datetime_features
from skforecast.preprocessing import DateTimeFeatureTransformer

date_range = pd.date_range(start='2021-01-01', periods=10, freq='D')
y = pd.Series(np.random.rand(10), index=date_range)
df = pd.DataFrame(np.random.rand(10, 2), index=date_range, columns=['feature1', 'feature2'])
display(y.head(3))
display(df.head(3))

## Using function

In [None]:
calendar_features = create_datetime_features(X=y, features = ['month', 'day_of_week'], encoding='cyclic')
calendar_features

## Custom Transformer

In [None]:
# Version skforecast CalendarFeatures
# ======================================================================================
datetime_transformer = DateTimeFeatureTransformer(features=['month', 'day_of_week'], encoding='cyclic')
datetime_features = datetime_transformer.transform(df)
display(datetime_features)

# Utilizando feature-engine

In [None]:
from feature_engine.datetime import DatetimeFeatures
from feature_engine.creation import CyclicalFeatures
from sklearn.pipeline import Pipeline


dtf = DatetimeFeatures(
    features_to_extract = ["year", "month", "day_of_month"],
    variables = "index",
    drop_original=True
    )
cf = CyclicalFeatures(
    variables=["year", "month", "day_of_month"],
    max_values={"month": 12, "day_of_month": 31},
    drop_original=True
)

dtf.fit_transform(df)

# pipeline with both transformers
pipe = Pipeline([
    ("dtf", dtf),
    ("cf", cf)
])
pipe.fit(df)
X_transformed = pipe.transform(df)
X_transformed

# Examples

In [None]:
n = 100
date_range = pd.date_range(start='2021-01-01', periods=n, freq='D')
y = pd.Series(np.random.rand(n), index=date_range)
display(y.head(3))

In [None]:
forecaster = ForecasterAutoreg(
    regressor = Ridge(),
    lags = 3,
    datetime_features=create_datetime_features
)
forecaster.fit(y)
print(forecaster)
X_train, y_train = forecaster.create_train_X_y(y)
X_train.head(3)

In [None]:
exog = create_datetime_features(X=y)
forecaster = ForecasterAutoreg(
    regressor = Ridge(),
    lags = 3,
)
forecaster.fit(y, exog)
print(forecaster)
X_train, y_train = forecaster.create_train_X_y(y, exog)
X_train

In [None]:
forecaster = ForecasterAutoreg(
    regressor = Ridge(),
    lags = 3,
    datetime_features=[(create_datetime_features, {"features": ["month"], "encoding": "cyclic"})]
)
forecaster.fit(y=y)
print(forecaster)
X_train, y_train = forecaster.create_train_X_y(y)
X_train

In [None]:
forecaster = ForecasterAutoreg(
    regressor = Ridge(),
    lags = 3,
    datetime_features=[(create_datetime_features, {"features": ["month"], "encoding": "cyclic"}), (create_datetime_features, {"features": ["day_of_week"], "encoding": "cyclic"})]
)
forecaster.fit(y=y)
print(forecaster)
X_train, y_train = forecaster.create_train_X_y(y)
X_train

In [None]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.rand(5, 3), columns=['feature1', 'feature2', 'feature3'])
df_view = df.iloc[:, :-1] # esto crea una vista
display(df)
display(df_view)
df_view.at[0, 'feature1'] = np.nan
display(df)
display(df_view)

In [None]:
date_rng = pd.date_range(start='1/1/2022', end='1/10/2022', freq='D')
df = pd.DataFrame(date_rng, columns=['date'])
df = df.set_index('date')

result = create_datetime_features(df, features=['year', 'month', 'weekend'], encoding='cyclic')
result.to_dict()

In [125]:
 DateTimeFeatureTransformer(encoding="invalid encoding")

In [131]:
df = pd.DataFrame(
    np.random.rand(5, 3),
    columns=["col_1", "col_2", "col_3"],
    index=pd.date_range(start="1/1/2022", end="1/5/2022", freq="D"),
)
create_datetime_features(df, features=["invalid_feature"])

ValueError: Features {'invalid_feature'} are not supported. Supported features are ['year', 'month', 'week', 'day_of_week', 'day_of_year', 'day_of_month', 'weekend', 'hour', 'minute', 'second'].

In [130]:
error_message = (
        "ValueError: Features {'invalid_feature'} are not supported. Supported features "
        "are ['year', 'month', 'week', 'day_of_week', 'day_of_year', 'day_of_month', "
        "'weekend', 'hour', 'minute', 'second']."
    )
error_message

"ValueError: Features {'invalid_feature'} are not supported. Supported features are ['year', 'month', 'week', 'day_of_week', 'day_of_year', 'day_of_month', 'weekend', 'hour', 'minute', 'second']."

In [124]:
df = pd.DataFrame({"a": [1, 2, 3]})
DateTimeFeatureTransformer().fit_transform(df)

ValueError: Input must have a datetime index