In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.darts import DartsXGBModel
from sktime.split import (
    temporal_train_test_split,
)

from probafcst.backtest import backtest
from probafcst.plotting import plot_quantiles

sns.set_theme(style="ticks")

In [None]:
quantile_levels = [0.025, 0.25, 0.5, 0.75, 0.975]

In [None]:
bikes = pd.read_parquet("../data/bikes.parquet")
load = pd.read_parquet("../data/energy.parquet")
load = load.asfreq("h")
bikes = bikes.asfreq("D")

In [None]:
from sktime.forecasting.compose import (
    RecursiveTabularRegressionForecaster,
    TransformedTargetForecaster,
)
from sktime.transformations.series.fourier import FourierFeatures
from sktime.transformations.series.holiday import CountryHolidaysTransformer
from sktime.transformations.series.impute import Imputer
from sktime.transformations.series.lag import Lag
from xgboost import XGBRegressor

lags = Lag([-30, -7, -1])
holidyas = CountryHolidaysTransformer(country="DE", subdiv="BW")
ff = FourierFeatures(sp_list=[7, 90, 365], fourier_terms_list=[4, 1, 1])
model = XGBRegressor(
    objective="reg:quantileerror",
    tree_method="hist",
    n_estimators=100,
    n_jobs=-1,
    quantile_alpha=quantile_levels,
)
# Does not work, because predict_quantiles needs to be implemented for this model
forecaster = RecursiveTabularRegressionForecaster(
    model,
    window_length=7,
)
forecaster.set_tags(**{"capability:pred_int": True})

pipeline = TransformedTargetForecaster(
    [
        ("lags", lags),
        ("holidays", holidyas),
        ("fourier", ff),
        ("imputer", Imputer(method="nearest")),
        ("forecaster", forecaster),
    ],
)
pipeline.set_tags(**{"capability:pred_int": True})

In [None]:
from sktime.forecasting.darts import DartsLinearRegressionModel
from sktime.performance_metrics.forecasting.probabilistic import PinballLoss

forecaster = DartsLinearRegressionModel(
    lags=[-7, -30, -365],
    lags_future_covariates=[-7, -30, -365, *list(range(30))],
    add_encoders={"cyclic": {"future": ["day", "month", "day_of_year", "quarter"]}},
    output_chunk_length=7,
    quantiles=quantile_levels,
    likelihood="quantile",
    multi_models=False,
)

y_train, y_test = temporal_train_test_split(bikes, test_size=30)
fh = ForecastingHorizon(y_test.index, is_relative=False)
forecaster.fit(y_train, fh=fh.to_relative(y_train.index[-1]))
pred_quantiles = forecaster.predict_quantiles(fh, alpha=quantile_levels)
PinballLoss()(y_test, pred_quantiles)

In [None]:
backtest(
    pipeline,
    bikes,
    forecast_steps=30,
    quantiles=quantile_levels,
    initial_window=365 * 3,
    step_length=90,
    backend="loky",
)

In [None]:
y_train, y_test = temporal_train_test_split(
    load.iloc[-24 * 365 * 3 :], test_size=24 * 14
)
fh = ForecastingHorizon(y_test.index, is_relative=False)
output_chunk_length = 1  # len(splitter.fh)
forecaster = DartsXGBModel(
    lags=[-24, -24 * 7],
    likelihood="quantile",
    quantiles=quantile_levels,
    multi_models=False,
    output_chunk_length=output_chunk_length,
)
forecaster.fit(y_train)

In [None]:
pred_quantiles = forecaster.predict_quantiles(fh, alpha=quantile_levels)
plot_quantiles(y_test, pred_quantiles)

## Backtesting


In [None]:
output_chunk_length = 24 * 14
add_encoders = {
    "cyclic": {"future": ["hour", "day", "month", "day_of_year"]},
}
forecaster = DartsXGBModel(
    lags=[-24, -24 * 7],
    likelihood="quantile",
    quantiles=quantile_levels,
    multi_models=False,
    # add_encoders=add_encoders,
    output_chunk_length=output_chunk_length,
    # lags_future_covariates=[-24 * 30, -24 * 7, -24, 0, 24, 24 * 7, 24 * 30],
    kwargs={"n_jobs": 1},
)

In [None]:
CountryHolidaysTransformer(country="DE").fit_transform(load)

In [None]:
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.compose import YtoX

ff = FourierFeatures(
    sp_list=[24, 24 * 7, 24 * 365], fourier_terms_list=[1, 1, 1], freq="h"
)
pipeline = ForecastingPipeline(
    [
        YtoX(),
        ff,
        # CountryHolidaysTransformer(country="DE", subdiv="BW"),
        forecaster,
    ]
)
pipeline.fit(load)
y_pred = pipeline.predict_quantiles(np.arange(1, 168), alpha=quantile_levels)
plot_quantiles(load.loc["2024-11-01":], y_pred)

In [None]:
results, metrics, predictions, add_metrics = backtest(
    pipeline,
    y=load,
    X=None,
    forecast_steps=24 * 14,
    quantiles=quantile_levels,
    initial_window=24 * 365 * 3,
    step_length=24 * 30 * 3,
    # backend="loky",
)

In [None]:
results

In [None]:
sns.set_theme(style="ticks")

In [None]:
# create box plots for each quantile loss using results frame
# use melt for this
melted = results[quantile_levels].melt(var_name="quantile", value_name="loss")
melted["quantile"] = melted["quantile"].apply(lambda x: f"q{x}")
ax = sns.boxplot(data=melted, x="quantile", y="loss", hue="quantile")

In [None]:
metrics

In [None]:
# plot each forecast period
nrows = min(3, len(results))
for i, row in predictions.iloc[-nrows:].iterrows():
    plot_quantiles(row.y_test, row.y_pred_quantiles)