In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from darts import TimeSeries
from darts.models.forecasting.xgboost import XGBModel
from darts.metrics.metrics import rmse
from darts.utils.model_selection import train_test_split

In [7]:
from japr_data.twinplant import horsens
from japr_data.climatedata import dmi_data

In [16]:
horsens.index.name = "time"
data = (
    horsens.merge(dmi_data("horsens"), left_index=True, right_index=True, how="inner")
    .rename(columns={"FB20F11_81": "flow"})
    .iloc[2:,]
    .resample("1D", closed="right")
    .agg(
        {
            "flow": "mean",
            "acc_precip": "sum",
            "mean_radiation": "sum",
            "mean_temp": "mean",
        }
    )
    .dropna()
)
data.head()

Unnamed: 0_level_0,flow,acc_precip,mean_radiation,mean_temp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-11-07 00:00:00+00:00,2537.061,4.6,937.0,8.366667
2023-11-08 00:00:00+00:00,1988.173274,1.8,0.0,8.285714
2023-11-09 00:00:00+00:00,3005.526184,5.8,337.0,8.53
2023-11-10 00:00:00+00:00,3490.331014,4.2,689.0,7.275
2023-11-14 00:00:00+00:00,3194.873297,2.8,266.7,5.277778


In [20]:
ts = TimeSeries.from_dataframe(data, freq="D").longest_contiguous_slice()

flow = ts["flow"]
ambient_covariates = ts[["acc_precip", "mean_temp", "mean_radiation"]]

train, test = train_test_split(flow)
test_idx = test.time_index
train_idx = train.time_index

The provided DatetimeIndex was associated with a timezone, which is currently not supported by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling `ts.time_index.tz_localize(UTC)` when exporting the results.To plot the series with the right time steps, consider setting the matplotlib.pyplot `rcParams['timezone']` parameter to automatically convert the time axis back to the original timezone.


In [70]:
encoders = {
    "datetime_attribute": {"future": "dayofweek"},
    "cyclic": {"future": "month"},
}
n_lags = 7
horizon = 10
xgb = XGBModel(
    lags=7,
    lags_past_covariates=n_lags,
    lags_future_covariates=(0, 1),
    output_chunk_length=horizon,
    add_encoders=encoders,
).fit(train, past_covariates=ambient_covariates)

The selected dates should be between:

In [None]:
test.time_index[0], test.time_index[-1]

(Timestamp('2025-01-02 00:00:00'), Timestamp('2025-02-17 00:00:00'))

In [None]:
from ipywidgets import DatePicker, Checkbox, interact
from datetime import date

date_picker = DatePicker(
    value=date(2025, 2, 4),
    description="Start date",
    tooltip="Starting date of the forecast",
)
fix_axis = Checkbox(value=False, description="Fix axes")


@interact(date=date_picker, fixed_axis=fix_axis)
def plot_prediction_comparison(date, fixed_axis):
    date = pd.Timestamp(date)
    prior_series, _ = ts["flow"].split_before(date)

    actual = ts.slice(
        date - pd.Timedelta(5, unit="D"), date + pd.Timedelta(horizon, unit="D")
    )
    predicted = xgb.predict(
        horizon, series=prior_series, past_covariates=ambient_covariates
    )

    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    actual["flow"].plot(label="Actual", ax=ax)
    predicted.plot(label="predicted", ax=ax)
    if fixed_axis:
        ax.set_xlim(pd.Timestamp("01-31-2025"), pd.Timestamp("02-13-2025"))
        ax.set_ylim(800, 2000)
        ax.axvline(x=pd.Timestamp("02-07-2025"), c="orange")
    plt.legend(frameon=False)
    plt.show()

interactive(children=(DatePicker(value=datetime.date(2025, 2, 4), description='Start date', step=1, tooltip='S…