In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

sns.set_theme(style="ticks")

In [None]:
load = pd.read_parquet("../data/energy.parquet").asfreq("h")

In [None]:
sns.lineplot(load)

In [None]:
sns.lineplot(load.query("date_time.dt.year == 2023"))

## Seasonalities

In [None]:
# groupby month and show grouped boxplot
load["month"] = load.index.month
load["day"] = load.index.day
load["hour"] = load.index.hour
load["weekday"] = load.index.weekday
sns.boxplot(x="month", y="load", data=load)

In [None]:
sns.boxplot(x="weekday", y="load", data=load)

In [None]:
sns.boxplot(x="hour", y="load", data=load)

In [None]:
load["load"].rolling(52 * 7 * 24).mean().plot()

## Removing the seasonality

In [None]:
load["logLoad"] = np.log(load["load"])
load["logLoad"].plot()

In [None]:
import holidays
from sklearn.linear_model import LinearRegression
from sktime.forecasting.compose import (
    ForecastingPipeline,
    TransformedTargetForecaster,
    make_reduction,
)
from sktime.transformations.all import (
    DateTimeFeatures,
    FourierFeatures,
    Logger,
    YtoX,
)
from sktime.transformations.compose import FeatureUnion
from sktime.transformations.series.holiday import (
    HolidayFeatures,
)
from sktime.utils.plotting import plot_series

fourier_kwargs = {"sp_list": [24, 24 * 7, 24 * 365], "fourier_terms_list": [1, 1, 1]}
logger = Logger(logger_backend="datalog", logger="features")

calender = holidays.country_holidays("DE", subdiv="BW")
holiday_features = HolidayFeatures(
    calender, return_indicator=True, return_dummies=False
)
featurizer = FeatureUnion(
    [
        FourierFeatures(**fourier_kwargs),
        DateTimeFeatures(manual_selection=["is_weekend"]),
        holiday_features,
    ]
)
lr = make_reduction(LinearRegression(), window_length=30, strategy="recursive")
model = ForecastingPipeline([YtoX(), featurizer, lr])

y = load["load"].loc["2022":]
model.fit(y)
y_pred = model.predict(np.arange(1, 24 * 7 * 4))
plot_series(y.iloc[-24 * 7 * 2 :], y_pred)

## Modelling

In [None]:
y = load["load"].loc["2022":]

In [None]:
from sktime.transformations.series.boxcox import LogTransformer

from probafcst.models.darts import get_xgboost_model

# from sktime.transformations.series.detrend import Deseasonalizer


quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
# model = DartsLinearRegressionModel(
#     lags=24,
#     output_chunk_length=6,
#     likelihood="quantile",
#     quantiles=quantiles,
#     multi_models=True,
#     kwargs=dict(solver="highs-ipm"),
# )
# model = DartsXGBModel(
#     lags=24 * 7,
#     output_chunk_length=24,
#     likelihood="quantile",
#     quantiles=quantiles,
#     multi_models=False,
#     kwargs=dict(n_jobs=1),
# )
# model.set_config(warnings="off")
model = get_xgboost_model(
    freq="h", quantiles=quantiles, xgb_kwargs=dict(n_jobs=1), output_chunk_length=24
)

forecaster = TransformedTargetForecaster(
    [
        LogTransformer(),
        # Detrender(PolynomialTrendForecaster(degree=2)),
        model,
    ]
)
forecaster

In [None]:
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.compose import YtoX

pipeline = ForecastingPipeline([YtoX(), holiday_features, forecaster])

In [None]:
from probafcst.backtest import backtest, get_window_params

wdw = get_window_params(
    n_years_initial_window=1, step_length_days=90, forecast_steps_days=7, freq="h"
)
result = backtest(
    pipeline, y, **wdw, quantiles=quantiles, backend="loky", splitter_type="sliding"
)

In [None]:
result.eval_results

In [None]:
worst_preds = result.eval_results["test_PinballLoss"].nlargest(2).index
best_preds = result.eval_results["test_PinballLoss"].nsmallest(2).index
worst_preds

In [None]:
from probafcst.plotting import plot_quantiles

for i, (_, y_test, y_pred_quantiles) in result[2].iloc[worst_preds].iterrows():
    plot_quantiles(y_test, y_pred_quantiles)

In [None]:
for i, (_, y_test, y_pred_quantiles) in result[2].iloc[best_preds].iterrows():
    plot_quantiles(y_test, y_pred_quantiles)