In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
from loguru import logger

logger.enable("probafcst")

In [None]:
import pandas as pd
import seaborn as sns

from probafcst.backtest import backtest
from probafcst.models.lgbm import LGBMQuantileForecaster
from probafcst.plotting import plot_quantiles

sns.set_theme(style="ticks")

In [None]:
quantile_levels = [0.025, 0.25, 0.5, 0.75, 0.975]

In [None]:
bikes = pd.read_parquet("../data/bikes.parquet")
load = pd.read_parquet("../data/energy.parquet")
load = load.asfreq("h").dropna()
bikes = bikes.asfreq("D").dropna()

In [None]:
bikes = bikes.loc["2021":]
load = load.loc["2021":]

## Backtesting


In [None]:
model = LGBMQuantileForecaster(
    lags=[1, 2, 3, 4, 5, 6, 7, 14, 21],
    quantiles=quantile_levels,
    kwargs={"n_estimators": 100, "random_state": 42, "verbose": -1},
)

In [None]:
use_bikes = False

if use_bikes:
    X = bikes.drop(columns="bike_count")
    y = bikes["bike_count"]
    DAY_HOURS = 1
else:
    X = load.drop(columns="load")
    y = load["load"]
    DAY_HOURS = 24

In [None]:
results, metrics, predictions, add_metrics = backtest(
    model,
    y=y,
    X=X,
    forecast_steps=DAY_HOURS * 7,
    quantiles=quantile_levels,
    initial_window=DAY_HOURS * 365 * 3,
    step_length=DAY_HOURS * 7,
    backend="loky",
)

In [None]:
sns.set_theme(style="ticks")

In [None]:
# create box plots for each quantile loss using results frame
# use melt for this
melted = results[quantile_levels].melt(var_name="quantile", value_name="loss")
melted["quantile"] = melted["quantile"].apply(lambda x: f"q{x}")
ax = sns.boxplot(data=melted, x="quantile", y="loss", hue="quantile")

In [None]:
metrics

In [None]:
results.head()

In [None]:
results.tail()

In [None]:
# plot each forecast period
nrows = min(3, len(results))
for i, row in predictions.iloc[-nrows:].iterrows():
    plot_quantiles(row.y_test, row.y_pred_quantiles)