In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

sns.set_theme(style="ticks")

In [None]:
load = pd.read_parquet("../data/energy.parquet").asfreq("h")

In [None]:
sns.lineplot(load)

In [None]:
sns.lineplot(load.query("date_time.dt.year == 2023"))

## Seasonalities

In [None]:
# groupby month and show grouped boxplot
load["month"] = load.index.month
load["day"] = load.index.day
load["hour"] = load.index.hour
load["weekday"] = load.index.weekday
sns.boxplot(x="month", y="load", data=load)

In [None]:
sns.boxplot(x="weekday", y="load", data=load)

In [None]:
sns.boxplot(x="hour", y="load", data=load)

In [None]:
load["load"].rolling(52 * 7 * 24).mean().plot()

## Removing the yearly seasonality

In [None]:
load["logLoad"] = np.log(load["load"])
load["logLoad"].plot()

In [None]:
from sktime.forecasting.all import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender

model = PolynomialTrendForecaster(degree=2)
detrender = Detrender(forecaster=model)

detrender.fit(load["logLoad"])

In [None]:
detrender.transform(load["logLoad"]).plot()

In [None]:
y = load["load"]

In [None]:
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.forecasting.darts import DartsLinearRegressionModel
from sktime.transformations.series.boxcox import LogTransformer

# from sktime.transformations.series.detrend import Deseasonalizer


quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
model = DartsLinearRegressionModel(
    lags=24,
    output_chunk_length=6,
    likelihood="quantile",
    quantiles=quantiles,
    multi_models=True,
    kwargs=dict(solver="highs-ipm"),
)
# model = DartsXGBModel(
#         lags=24*7,
#         output_chunk_length=6,
#         likelihood="quantile",
#         quantiles=quantiles,
#         multi_models=False,
#         kwargs=dict(n_jobs=1)
# )

forecaster = TransformedTargetForecaster(
    [
        LogTransformer(),
        # Detrender(PolynomialTrendForecaster(degree=2)),
        model,
    ]
)
forecaster

In [None]:
import holidays
from sktime.transformations.series.holiday import (
    HolidayFeatures,
)

calender = holidays.country_holidays("DE", subdiv="BW")
holiday_features = HolidayFeatures(
    calender, return_indicator=True, return_dummies=False
)
holiday_features.fit_transform(y)

In [None]:
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.compose import DataLog, FeatureUnion, Logger, YtoX
from sktime.transformations.series.date import DateTimeFeatures
from sktime.transformations.series.fourier import FourierFeatures

In [None]:
def get_featurizer(fourier_kwargs: dict, include_holidays=True):
    """Get Featurizer."""
    transformers = [
        ("fourier", FourierFeatures(**fourier_kwargs)),
        ("is_weekend", DateTimeFeatures(manual_selection=["is_weekend"])),
    ]
    if include_holidays:
        calender = holidays.country_holidays("DE", subdiv="BW")
        holiday_features = HolidayFeatures(
            calender, return_indicator=True, return_dummies=False
        )
        transformers.append(("holidays", holiday_features))
    return FeatureUnion(transformers)


fourier_kwargs = dict(
    sp_list=[24, 24 * 7, 24 * 7 * 52],
    fourier_terms_list=[10, 5, 2],
)
get_featurizer(fourier_kwargs)

In [None]:
from sktime.transformations.all import WindowSummarizer

lag_feature = {
    "median": [list(range(24 * 7))],
    "std": [list(range(24 * 7))],
}
ws = WindowSummarizer(truncate="bfill", lag_feature=lag_feature)

In [None]:
featurizer = get_featurizer(fourier_kwargs, include_holidays=True)


def get_pipeline(forecaster, featurizer, logger_name=None):
    """Get pipeline."""
    steps = [
        ("y_to_x", YtoX()),
        ("featurizer", featurizer),
    ]
    if logger_name is not None:
        logger = Logger(logger=logger_name, logger_backend="datalog")
        steps.append(("logger", logger))
    steps.append(("forecaster", forecaster))

    return ForecastingPipeline(steps=steps)


pipe = get_pipeline(forecaster, featurizer, logger_name="test")
pipe.fit(y.loc["2022":])

In [None]:
log = DataLog("test").get_log()
log[-1][1]["X"]

In [None]:
from probafcst.plotting import plot_quantiles

y_pred = pipe.predict_quantiles(np.arange(1, 24), alpha=quantiles)
plot_quantiles(y.iloc[-24 * 7 :], y_pred)

In [None]:
from probafcst.backtest import backtest, get_window_params

wdw = get_window_params(
    n_years_initial_window=2, step_length_days=90, forecast_steps_days=1, freq="h"
)
result = backtest(
    pipe, y, **wdw, quantiles=quantiles, backend="loky", splitter_type="sliding"
)

In [None]:
result.eval_results

In [None]:
for i, (_, y_test, y_pred_quantiles) in result[2].iloc[::4].iterrows():
    plot_quantiles(y_test, y_pred_quantiles)