## Hourly historical weather data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd

In [None]:
data = pd.read_parquet("../data/bikes.parquet")

In [None]:
data.info()

In [None]:
Xy = data.dropna().copy()
y = Xy.pop("bike_count")
X = Xy.copy()

In [None]:
X.shape, y.shape

In [None]:
from sktime.forecasting.base import ForecastingHorizon
from sktime.split import temporal_train_test_split

from probafcst.models.xgboost import XGBQuantileForecaster

y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=24)

quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

In [None]:
model = XGBQuantileForecaster(
    lags=[24, 48, 168],
    quantiles=quantiles,
    include_seasonal_dummies=True,
    X_lag_cols=None,
    xgb_kwargs=dict(n_jobs=-1),
)
model.fit(y_train, X_train)

In [None]:
feature_importances = pd.Series(
    model.model.feature_importances_, index=model.feature_names_in_
)
feature_importances.sort_values().plot(kind="barh")

In [None]:
from sktime.performance_metrics.forecasting.probabilistic import PinballLoss

from probafcst.plotting import plot_quantiles

X = pd.concat([X_train, X_test], axis=0)
fh = ForecastingHorizon(y_test.index, is_relative=False)
y_pred = model.predict_quantiles(fh, X=X, alpha=quantiles)
print(PinballLoss(score_average=False)(y_test, y_pred))
plot_quantiles(y_test, pred_quantiles=y_pred)

In [None]:
from probafcst.backtest import backtest, get_window_params

model.set_params(xgb_kwargs=dict(n_jobs=1))
window_params = get_window_params(
    n_years_initial_window=3, step_length_days=90, forecast_steps_days=7, freq="D"
)
backtest_results = backtest(
    model, y, **window_params, quantiles=quantiles, X=X, backend="loky"
)

In [None]:
backtest_results.metrics

In [None]:
results = backtest_results.eval_results
predictions = backtest_results.predictions

In [None]:
nrows = min(3, len(results))
for i, row in predictions.iloc[-nrows:].iterrows():
    plot_quantiles(row.y_test, row.y_pred_quantiles)