In [8]:
import polars as pl
from pathlib import Path

In [9]:
DATA_PATH = Path("../dataset")

df = pl.read_csv(
    DATA_PATH / "germany_electricity_price_daily.csv",
    try_parse_dates=True
)

df = df.sort("Date")
df.head()

Date,Electricity_Price_EUR_per_MWh
date,f64
2015-01-01,22.34
2015-01-02,22.34
2015-01-03,22.34
2015-01-04,22.34
2015-01-05,36.18


In [10]:
df = df.rename({
    "Date": "date",
    "Electricity_Price_EUR_per_MWh": "y"
})

df = df.with_columns(
    pl.col("date").cast(pl.Date)
)

In [11]:
TEST_START = pl.date(2024, 1, 1)

train_df = df.filter(pl.col("date") < TEST_START)
test_df  = df.filter(pl.col("date") >= TEST_START)

len(train_df), len(test_df)

(3287, 725)

In [12]:
df = df.with_columns(
    pl.col("y").shift(1).alias("naive")
)

In [13]:
df = df.with_columns(
    pl.col("y").shift(7).alias("seasonal_naive_7")
)

In [16]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

eval_df = df.filter(pl.col("date") >= TEST_START).drop_nulls()

y_true = eval_df["y"].to_numpy()
y_naive = eval_df["naive"].to_numpy()
y_seasonal = eval_df["seasonal_naive_7"].to_numpy()

mae_naive = mean_absolute_error(y_true, y_naive)
mae_seasonal = mean_absolute_error(y_true, y_seasonal)

rmse_naive = np.sqrt(mean_squared_error(y_true, y_naive))
rmse_seasonal = np.sqrt(mean_squared_error(y_true, y_seasonal))

mape_naive = np.mean(np.abs((y_true - y_naive) / y_true)) * 100
mape_seasonal = np.mean(np.abs((y_true - y_seasonal) / y_true)) * 100

mae_naive, mae_seasonal, rmse_naive, rmse_seasonal, mape_naive, mape_seasonal

(20.872344827586208,
 27.09655172413793,
 np.float64(28.61233419349075),
 np.float64(38.9049402553951),
 np.float64(123.34692938456922),
 np.float64(193.8722594247381))

- Seasonal naive performs better than naive, showing clear weekly seasonality.
- These baselines will be used as reference for more complex models.