# Baseline

Setting a simple baseline for a forecasting model with a 12 hours horizon.

In [2]:
import pandas as pd

from typing import List

from darts import TimeSeries
from darts.models import NaiveMovingAverage

from utils import compute_errors, get_rmse, get_mape

In [3]:
def assemble_comparison(candidates: List[pd.DataFrame]) -> pd.DataFrame:
    """Returns a dataframe comparing multiple models by lead time and metric"""
    return (
        pd.concat(candidates, axis=0)
        .reset_index()
        .melt(id_vars=["lead_time", "model"], var_name="metric")
    )

In [5]:
%run 1_data_split.ipynb
data: pd.DataFrame = data  # type: ignore # noqa: F821
test_mask: pd.Series = test_mask  # type: ignore # noqa: F821

In [6]:
test_data = data[test_mask].copy()
test_target = TimeSeries.from_series(test_data["flow"])

In [7]:
output_chunk_length = 12  # horizon of our forecast

n_lags = 6
naive = NaiveMovingAverage(input_chunk_length=n_lags)

- The model is trained every prediction step using the last 6 observations

In [8]:
naive_errors = compute_errors(test_target, naive, output_chunk_length, is_naive=True)

In [9]:
df_baseline = pd.DataFrame(
    {
        "RMSE": get_rmse(naive_errors),
        "MAPE": get_mape(naive_errors),
        "model": "baseline",
    }
)