In [162]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'/home/ximo/Documents/GitHub/skforecast'

In [163]:
# Libraries
# ======================================================================================
import numpy as np
import pandas as pd
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression

from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.preprocessing import OrdinalEncoder

In [164]:
# Data
# ======================================================================================
y = pd.Series(np.random.normal(size=50))
exog_1 = pd.Series([1]*25 + [2]*25, name='exog_1', dtype=int)
exog_2 = pd.Series(['a']*25 + ['b']*25, name='exog_2', dtype='category')
exog   = pd.concat((exog_1, exog_2), axis=1)
#exog.iloc[0,1] = np.nan
exog.head()

Unnamed: 0,exog_1,exog_2
0,1,a
1,1,a
2,1,a
3,1,a
4,1,a


In [180]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline

# Opción A
# ==============================================================================
pipeline = make_pipeline(
                OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1, encoded_missing_value=-1),
                FunctionTransformer(func=lambda x: x.astype('category'), feature_names_out= 'one-to-one')
           )

transformer_exog = make_column_transformer(
                        (
                            pipeline,
                            make_column_selector(dtype_include=["category"]),
                        ),
                        remainder="passthrough",
                        verbose_feature_names_out=False,
                   ).set_output(transform="pandas")

transformer_exog.fit_transform(exog).head(3).dtypes

exog_2    category
exog_1       int64
dtype: object

In [179]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline

# Opción B
# ==============================================================================

transformer_exog = make_pipeline(
                            make_column_transformer(
                                (
                                    OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1, encoded_missing_value=-1),
                                    make_column_selector(dtype_include="category"),
                                ),
                                remainder="passthrough",
                                verbose_feature_names_out=False
                            ),
                            make_column_transformer(
                                (
                                    FunctionTransformer(func=lambda x: x.astype('category'), feature_names_out= 'one-to-one'),
                                    make_column_selector(dtype_include="category"),
                                ),
                                remainder="passthrough",
                                verbose_feature_names_out=False,
                            )
                      ).set_output(transform="pandas")


transformer_exog.fit_transform(exog).dtypes

exog_2    int64
exog_1    int64
dtype: object

In [173]:
forecaster = ForecasterAutoreg(regressor=LGBMRegressor(random_state=12345), lags=5)
#forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=5)

forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags = 5,
                transformer_exog = transformer_exog
             )
X_train, y_train = forecaster.create_train_X_y(
                        y=y,
                        exog=exog
                   )
print(X_train.dtypes)
X_train.head(3)

lag_1      float64
lag_2      float64
lag_3      float64
lag_4      float64
lag_5      float64
exog_2    category
exog_1       int64
dtype: object


Unnamed: 0,lag_1,lag_2,lag_3,lag_4,lag_5,exog_2,exog_1
5,-1.410557,-1.454777,-0.172647,-1.212063,-0.452238,0,1
6,-0.469285,-1.410557,-1.454777,-0.172647,-1.212063,0,1
7,-0.572758,-0.469285,-1.410557,-1.454777,-0.172647,0,1


In [175]:
forecaster.fit(y=y, exog=exog)

In [176]:
exog_predict = exog.copy()
exog_predict.index = pd.RangeIndex(50, 100)
forecaster.predict(steps=3, exog=exog_predict)

50    0.176889
51    0.326085
52    0.462296
Name: pred, dtype: float64

In [49]:
categorical_cols = exog.select_dtypes(include="category").columns
ordinal_encoder = make_column_transformer(
    (
        OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1),
        make_column_selector(dtype_include="category"),
    ),
    remainder="passthrough",
    verbose_feature_names_out=False,
).set_output(transform="pandas")


exog_encoded = ordinal_encoder.fit_transform(exog)
exog_encoded[categorical_cols] = exog_encoded[categorical_cols].astype('category')

ValueError: There are missing values in features [0]. For OrdinalEncoder to encode missing values with dtype: <class 'int'>, set encoded_missing_value to a non-nan value, or set dtype to a float

In [None]:
X_train, y_train = forecaster.create_train_X_y(
    y=y,
    exog=exog_encoded
)
print(X_train.dtypes)
X_train.head(3)

In [None]:
forecaster.fit(y=y, exog=exog_encoded)
exog_predict = exog_encoded.copy()
exog_predict.index = pd.RangeIndex(50, 100)
forecaster.predict(steps=3, exog=exog_predict)

In [None]:
ordinal_encoder = make_column_transformer(
                        (
                            OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1),
                            make_column_selector(dtype_include=["object", "category"]),
                        ),
                        remainder="passthrough",
                        verbose_feature_names_out=False,
                    ).set_output(transform="pandas")


forecaster = ForecasterAutoreg(
    regressor = LGBMRegressor(),
    lags = 5,
    transformer_exog=OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1)
)

forecaster.fit(y=y, exog=exog_2)
exog_predict = exog_2.copy()
exog_predict.index = pd.RangeIndex(50, 100)
forecaster.predict(steps=3, exog=exog_predict)

50   -0.224048
51    0.325645
52   -0.425305
Name: pred, dtype: float64