In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import seaborn as sns

sns.set_theme(style="ticks")

In [None]:
load = pd.read_parquet("../data/energy.parquet").asfreq("h")

In [None]:
sns.lineplot(load)

In [None]:
sns.lineplot(load.query("date_time.dt.year == 2023"))

## Seasonalities

In [None]:
# groupby month and show grouped boxplot
load["month"] = load.index.month
load["day"] = load.index.day
load["hour"] = load.index.hour
load["weekday"] = load.index.weekday
sns.boxplot(x="month", y="load", data=load)

In [None]:
sns.boxplot(x="weekday", y="load", data=load)

In [None]:
sns.boxplot(x="hour", y="load", data=load)

In [None]:
load["load"].rolling(52 * 7 * 24).mean().plot()

## Modelling

In [None]:
y = load["load"].loc["2022":]

In [None]:
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.boxcox import LogTransformer

from probafcst.models.xgboost import XGBQuantileForecaster

quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
model = XGBQuantileForecaster(
    lags=[24 * i for i in range(1, 8)], quantiles=quantiles, xgb_kwargs={"n_jobs": 1}
)

forecaster = TransformedTargetForecaster(
    [
        LogTransformer(),
        # Detrender(PolynomialTrendForecaster(degree=2)),
        model,
    ]
)
forecaster

In [None]:
from probafcst.backtest import backtest, get_window_params

wdw = get_window_params(
    n_years_initial_window=1, step_length_days=30, forecast_steps_days=7, freq="h"
)
result = backtest(
    forecaster, y, **wdw, quantiles=quantiles, backend="loky", splitter_type="sliding"
)

In [None]:
result.eval_results

In [None]:
worst_preds = result.eval_results["test_PinballLoss"].nlargest(2).index
best_preds = result.eval_results["test_PinballLoss"].nsmallest(2).index
worst_preds

In [None]:
from probafcst.plotting import plot_quantiles

for i, (_, y_test, y_pred_quantiles) in result[2].iloc[worst_preds].iterrows():
    plot_quantiles(y_test, y_pred_quantiles)

In [None]:
for i, (_, y_test, y_pred_quantiles) in result[2].iloc[best_preds].iterrows():
    plot_quantiles(y_test, y_pred_quantiles)