In [59]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'/home/ubuntu/varios/skforecast'

In [70]:
# Libraries
# ======================================================================================
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import FunctionTransformer

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster

import xgboost
import lightgbm
import catboost
import sklearn
import skforecast

print(f"xgboost version: {xgboost.__version__}")
print(f"lightgbm version: {lightgbm.__version__}")
print(f"catboost version: {catboost.__version__}")
print(f"sklearn version: {sklearn.__version__}")
print(f"skforecast version: {skforecast.__version__}")

xgboost version: 1.7.4
lightgbm version: 3.3.5
catboost version: 1.1.1
sklearn version: 1.2.1
skforecast version: 0.7.0


In [71]:
# Data
# ======================================================================================
y = pd.Series(np.random.normal(size=50))
exog_1 = pd.Series([1]*25 + [2]*25, name='exog_1', dtype=float)
exog_2 = pd.Series([True]*25 + [False]*25, name='exog_2', dtype=bool)
exog   = pd.concat((exog_1, exog_2), axis=1)
#exog.iloc[0,1] = np.nan
exog.head()

Unnamed: 0,exog_1,exog_2
0,1.0,True
1,1.0,True
2,1.0,True
3,1.0,True
4,1.0,True


In [72]:
# Opción A
# ==============================================================================
pipeline_categorical = make_pipeline(
                                OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1, encoded_missing_value=-1),
                                FunctionTransformer(func=lambda x: x.astype('category'), feature_names_out= 'one-to-one')
                        )

transformer_exog = make_column_transformer(
                        (
                            pipeline_categorical,
                            make_column_selector(dtype_exclude=np.number)
                        ),
                        remainder="passthrough",
                        verbose_feature_names_out=False,
                   ).set_output(transform="pandas")

print(transformer_exog.fit_transform(exog).head(3).dtypes)
transformer_exog.fit_transform(exog).head(3)

exog_2    category
exog_1     float64
dtype: object


Unnamed: 0,exog_2,exog_1
0,1,1.0
1,1,1.0
2,1,1.0


In [63]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline

# Opción B
# ==============================================================================
# categorical_cols = exog.select_dtypes(exclude=np.number).columns
# transformer_exog = make_pipeline(
#                             make_column_transformer(
#                                 (
#                                     OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1, encoded_missing_value=-1),
#                                     categorical_cols,
#                                 ),
#                                 remainder="passthrough",
#                                 verbose_feature_names_out=False
#                             ),
#                             make_column_transformer(
#                                 (
#                                     FunctionTransformer(func=lambda x: x.astype('category'), feature_names_out= 'one-to-one'),
#                                     categorical_cols,
#                                 ),
#                                 remainder="passthrough",
#                                 verbose_feature_names_out=False,
#                             )
#                       ).set_output(transform="pandas")

# transformer_exog.fit_transform(exog).dtypes

In [64]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline

# Opción C: hacer solo el ordinal encoder e indicar en el regressor, qué columnas son categóricas
# ==============================================================================
# categorical_cols = exog.select_dtypes(exclude=np.number).columns.to_list()
# transformer_exog = make_column_transformer(
#                                 (
#                                     OrdinalEncoder(dtype=int, handle_unknown="use_encoded_value", unknown_value=-1, encoded_missing_value=-1),
#                                     categorical_cols,
#                                 ),
#                                 remainder="passthrough",
#                                 verbose_feature_names_out=False
#                             ).set_output(transform="pandas")

# transformer_exog.fit_transform(exog).dtypes

In [68]:
# Lightgbm
# ======================================================================================
forecaster = ForecasterAutoreg(
                regressor = LGBMRegressor(
                              #categorical_feature = f"name:{','.join(categorical_cols)}",
                              #categorical_feature = 'auto',
                              n_estimators=10,
                              random_state=12345
                            ),
                lags = 5,
                transformer_exog = transformer_exog
             )
X_train, y_train = forecaster.create_train_X_y(
                        y=y,
                        exog=exog
                   )
print(X_train.dtypes)
display(X_train.head(3))
forecaster.fit(y=y, exog=exog)

exog_predict = exog.copy()
exog_predict.index = pd.RangeIndex(50, 100)
forecaster.predict(steps=3, exog=exog_predict)

lag_1      float64
lag_2      float64
lag_3      float64
lag_4      float64
lag_5      float64
exog_2    category
exog_1     float64
dtype: object


Unnamed: 0,lag_1,lag_2,lag_3,lag_4,lag_5,exog_2,exog_1
5,-0.633334,-0.786745,0.870672,-0.576013,1.368017,1,1.0
6,-0.524161,-0.633334,-0.786745,0.870672,-0.576013,1,1.0
7,0.174525,-0.524161,-0.633334,-0.786745,0.870672,1,1.0


50    0.337666
51    0.066307
52    0.058130
Name: pred, dtype: float64

In [54]:
# XGBoost
# ======================================================================================
forecaster = ForecasterAutoreg(
                regressor = LGBMRegressor(
                              #categorical_feature = f"name:{','.join(categorical_cols)}",
                              #categorical_feature = 'auto',
                              n_estimators=10,
                              random_state=12345
                            ),
                lags = 5,
                transformer_exog = transformer_exog
             )
X_train, y_train = forecaster.create_train_X_y(
                        y=y,
                        exog=exog
                   )
print(X_train.dtypes)
display(X_train.head(3))
forecaster.fit(y=y, exog=exog)

exog_predict = exog.copy()
exog_predict.index = pd.RangeIndex(50, 100)
forecaster.predict(steps=3, exog=exog_predict)

50   -0.159451
51    0.093426
52    0.093426
Name: pred, dtype: float64