In [8]:
import polars as pl
from pathlib import Path

In [9]:
DATA_PATH = Path("../dataset")

df = pl.read_csv(
    DATA_PATH / "germany_electricity_price_daily.csv",
    try_parse_dates=True
)

df = df.sort("Date")
df.head()

Date,Electricity_Price_EUR_per_MWh
date,f64
2015-01-01,22.34
2015-01-02,22.34
2015-01-03,22.34
2015-01-04,22.34
2015-01-05,36.18


In [10]:
df = df.rename({
    "Date": "date",
    "Electricity_Price_EUR_per_MWh": "y"
})

df = df.with_columns(
    pl.col("date").cast(pl.Date)
)

In [11]:
TEST_START = pl.date(2024, 1, 1)

train_df = df.filter(pl.col("date") < TEST_START)
test_df  = df.filter(pl.col("date") >= TEST_START)

len(train_df), len(test_df)

(3287, 725)

In [12]:
df = df.with_columns(
    pl.col("y").shift(1).alias("naive")
)

In [13]:
df = df.with_columns(
    pl.col("y").shift(7).alias("seasonal_naive_7")
)

In [16]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

eval_df = df.filter(pl.col("date") >= TEST_START).drop_nulls()

y_true = eval_df["y"].to_numpy()
y_naive = eval_df["naive"].to_numpy()
y_seasonal = eval_df["seasonal_naive_7"].to_numpy()

mae_naive = mean_absolute_error(y_true, y_naive)
mae_seasonal = mean_absolute_error(y_true, y_seasonal)

rmse_naive = np.sqrt(mean_squared_error(y_true, y_naive))
rmse_seasonal = np.sqrt(mean_squared_error(y_true, y_seasonal))

mape_naive = np.mean(np.abs((y_true - y_naive) / y_true)) * 100
mape_seasonal = np.mean(np.abs((y_true - y_seasonal) / y_true)) * 100

mae_naive, mae_seasonal, rmse_naive, rmse_seasonal, mape_naive, mape_seasonal

(20.872344827586208,
 27.09655172413793,
 np.float64(28.61233419349075),
 np.float64(38.9049402553951),
 np.float64(123.34692938456922),
 np.float64(193.8722594247381))

- Seasonal naive performs better than naive, showing clear weekly seasonality.
- These baselines will be used as reference for more complex models.

In [25]:
countries = {
    "austria": "austria_electricity_price_daily.csv",
    "germany": "germany_electricity_price_daily.csv",
    "switzerland": "switzerland_electricity_price_daily.csv",
    "france": "france_electricity_price_daily.csv",
    "poland": "poland_electricity_price_daily.csv",
    "estonia": "estonia_electricity_price_daily.csv",
}

In [26]:
def evaluate_baselines(df, test_start):
    df = df.with_columns([
        pl.col("y").shift(1).alias("naive"),
        pl.col("y").shift(7).alias("seasonal_naive_7"),
    ])
    
    eval_df = df.filter(pl.col("date") >= test_start).drop_nulls()
    
    y_true = eval_df["y"].to_numpy()
    y_naive = eval_df["naive"].to_numpy()
    y_seasonal = eval_df["seasonal_naive_7"].to_numpy()
    
    return {
        "mae_naive": mean_absolute_error(y_true, y_naive),
        "mae_seasonal": mean_absolute_error(y_true, y_seasonal),
        "rmse_naive": np.sqrt(mean_squared_error(y_true, y_naive)),
        "rmse_seasonal": np.sqrt(mean_squared_error(y_true, y_seasonal)),
        "mape_naive": np.mean(np.abs((y_true - y_naive) / y_true)) * 100,
        "mape_seasonal": np.mean(np.abs((y_true - y_seasonal) / y_true)) * 100,
    }

In [27]:
results = []

for country, file in countries.items():
    df = pl.read_csv(DATA_PATH / file, try_parse_dates=True)
    df = df.rename({"Date": "date", "Electricity_Price_EUR_per_MWh": "y"})
    df = df.sort("date")
    
    metrics = evaluate_baselines(df, TEST_START)
    metrics["country"] = country
    results.append(metrics)

In [28]:
baseline_results = pl.DataFrame(results)
baseline_results

mae_naive,mae_seasonal,rmse_naive,rmse_seasonal,mape_naive,mape_seasonal,country
f64,f64,f64,f64,f64,f64,str
16.1364,18.7068,22.61294,26.828859,25.454243,29.138782,"""austria"""
20.872345,27.096552,28.612334,38.90494,123.346929,193.872259,"""germany"""
13.008905,14.955368,18.391042,20.611866,35.823119,37.006315,"""switzerland"""
15.371462,25.347269,20.590942,34.040016,140.557778,84.570847,"""france"""
18.272028,22.906538,24.944024,30.345585,48.481325,78.520151,"""poland"""
31.931379,41.822469,55.1607,68.179462,71.786547,105.516075,"""estonia"""


In [29]:
TEST_START = pl.date(2024, 1, 1)