In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
from loguru import logger

logger.enable("probafcst")

In [None]:
import pandas as pd
import seaborn as sns

from probafcst.backtest import backtest

sns.set_theme(style="ticks")

In [None]:
quantile_levels = [0.025, 0.25, 0.5, 0.75, 0.975]

In [None]:
bikes = pd.read_parquet("../data/bikes.parquet")
load = pd.read_parquet("../data/energy.parquet")
load = load.asfreq("h").dropna()
bikes = bikes.asfreq("D").dropna()

In [None]:
load.head()

In [None]:
bikes.tail()

In [None]:
bikes = bikes.loc["2021":]
load = load.loc["2021":]

In [None]:
# predictions are made on wednesdays
eval_start = "2024-10-23"

# compute the length of the training window
n_train_bikes = bikes.loc[:eval_start].shape[0]
n_train_energy = load.loc[:eval_start].shape[0]
n_train_energy

## Backtesting


In [None]:
import dvc.api
from omegaconf import OmegaConf

from probafcst.models import get_model

model = "lgbm"
target = "energy"
quantile_levels = [0.025, 0.25, 0.5, 0.75, 0.975]

pipe_params = dvc.api.params_show()
model_params = OmegaConf.create(pipe_params).train[target]

model_params.lgbm.kwargs.colsample_bytree = 0.8
model_params.lgbm.kwargs.learning_rate = 0.1
model_params.lgbm.kwargs.max_depth = 7
model_params.lgbm.kwargs.n_estimators = 250
model_params.lgbm.kwargs.reg_lambda = 1e-1
model_params.lgbm.kwargs.min_child_samples = 200
model_params.lgbm.kwargs.verbose = -1
model_params.lgbm.kwargs.min_split_gain = 1e-2
model_params.lgbm.kwargs.boosting_type = "goss"
model_params.lgbm.kwargs.subsample = 1
model_params.lgbm.kwargs.top_rate = 0.4

model_params.selected = "lgbm"
model = get_model(model_params, quantiles=quantile_levels)
model

In [None]:
use_bikes = target == "bikes"

if use_bikes:
    X = bikes.drop(columns="bike_count")
    y = bikes["bike_count"]
    initial_window = n_train_bikes
    DAY_HOURS = 1
else:
    X = load.drop(columns="load")
    y = load["load"]
    initial_window = n_train_energy
    DAY_HOURS = 24

In [None]:
model.fit(y, X)

In [None]:
results, metrics, predictions, add_metrics = backtest(
    model,
    y=y,
    X=X,
    forecast_steps=DAY_HOURS * 7,  # one week
    quantiles=quantile_levels,
    initial_window=initial_window,
    step_length=DAY_HOURS * 7,  # one week
    backend="loky",
)

In [None]:
metrics

In [None]:
from probafcst.plotting import plot_quantiles

# visualize some forecasts
idx = [0, 1, 2, 3, len(results) // 2, -1]
for i, (_, row) in enumerate(predictions.iloc[idx].iterrows()):
    fig, _ = plot_quantiles(row.y_test, row.y_pred_quantiles)