In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
from loguru import logger

logger.enable("probafcst")

In [None]:
import pandas as pd
import seaborn as sns

from probafcst.backtest import backtest
from probafcst.models.xgboost import XGBQuantileForecaster

sns.set_theme(style="ticks")

In [None]:
quantile_levels = [0.025, 0.25, 0.5, 0.75, 0.975]

In [None]:
bikes = pd.read_parquet("../data/bikes.parquet")
load = pd.read_parquet("../data/energy.parquet")
load = load.asfreq("h").dropna()
bikes = bikes.asfreq("D").dropna()

In [None]:
bikes.tail()

In [None]:
bikes = bikes.loc["2021":]
load = load.loc["2021":]

## Backtesting


In [None]:
# model = LGBMQuantileForecaster(
#     lags=[1, 2, 3, 4, 5, 6, 7, 14, 21],
#     quantiles=quantile_levels,
#     kwargs={"n_estimators": 100, "random_state": 42, "verbose": -1},
# )
model = XGBQuantileForecaster(
    lags=[24, 48, 72, 96, 120, 144, 168, 336],
    quantiles=quantile_levels,
    include_seasonal_dummies=True,
    cyclical_encodings=True,
    include_rolling_stats=True,
    X_lag_cols=[],
    kwargs={"n_estimators": 100, "random_state": 0},
)

In [None]:
use_bikes = True

if use_bikes:
    X = bikes.drop(columns="bike_count")
    y = bikes["bike_count"]
    DAY_HOURS = 1
else:
    X = load.drop(columns="load")
    y = load["load"]
    DAY_HOURS = 24

In [None]:
results, metrics, predictions, add_metrics = backtest(
    model,
    y=y,
    X=X,
    forecast_steps=DAY_HOURS * 7,
    quantiles=quantile_levels,
    initial_window=DAY_HOURS * 365 * 3,
    step_length=DAY_HOURS * 90,
    backend="loky",
)

In [None]:
metrics

In [None]:
from sktime.utils.plotting import plot_lags, plot_windows

In [None]:
import matplotlib.pyplot as plt
from sktime.split import SlidingWindowSplitter

cv = SlidingWindowSplitter(
    fh=list(range(2, 8)), window_length=DAY_HOURS * 365 * 3, step_length=DAY_HOURS * 40
)

In [None]:
fig, ax = plot_windows(cv, y)
# rotate x-axis labels
plt.setp(ax.get_xticklabels(), rotation=45)
plt.savefig("../presentation/cv_window.svg", bbox_inches="tight")

In [None]:
plot_lags(y, lags=[1, 7, 14])
# plt.savefig("../presentation/bikes_lags.svg")

In [None]:
model.fit(y, X)

In [None]:
model.model.feature_importances_

pd.Series(model.model.feature_importances_, index=model.feature_names_in_).nlargest(
    10
).sort_values(ascending=True).plot.barh()
plt.suptitle("XGBoost Feature Importance")
plt.savefig("presentation_feature_importance.svg", bbox_inches="tight")