In [88]:
import polars as pl

In [89]:
electricity_df = pl.read_csv("../data/eu_electricity_daily.csv")
electricity_df = electricity_df.with_columns(pl.col("Date").str.strptime(pl.Date, format="%Y-%m-%d").alias("Date"))

### Only use data for austria:

In [90]:
electricity_df = electricity_df.filter(electricity_df["ISO3 Code"] == "AUT")
electricity_df


Country,ISO3 Code,Date,Price (EUR/MWhe)
str,str,date,f64
"""Austria""","""AUT""",2015-01-01,22.34
"""Austria""","""AUT""",2015-01-02,22.34
"""Austria""","""AUT""",2015-01-03,22.34
"""Austria""","""AUT""",2015-01-04,22.34
"""Austria""","""AUT""",2015-01-05,36.18
…,…,…,…
"""Austria""","""AUT""",2025-12-18,135.16
"""Austria""","""AUT""",2025-12-19,126.37
"""Austria""","""AUT""",2025-12-20,111.08
"""Austria""","""AUT""",2025-12-21,106.41


### Create daily dataset:

In [91]:
daily_df = electricity_df.select([
    pl.col("Date").alias("ds"),
    pl.col("Price (EUR/MWhe)").alias("y")
]).sort("ds")
daily_df

ds,y
date,f64
2015-01-01,22.34
2015-01-02,22.34
2015-01-03,22.34
2015-01-04,22.34
2015-01-05,36.18
…,…
2025-12-18,135.16
2025-12-19,126.37
2025-12-20,111.08
2025-12-21,106.41


### Create montly dataset:

In [92]:
monthly_df = electricity_df.with_columns(pl.col("Date").dt.truncate("1mo").alias("month_first"))
monthly_df

Country,ISO3 Code,Date,Price (EUR/MWhe),month_first
str,str,date,f64,date
"""Austria""","""AUT""",2015-01-01,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-02,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-03,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-04,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-05,36.18,2015-01-01
…,…,…,…,…
"""Austria""","""AUT""",2025-12-18,135.16,2025-12-01
"""Austria""","""AUT""",2025-12-19,126.37,2025-12-01
"""Austria""","""AUT""",2025-12-20,111.08,2025-12-01
"""Austria""","""AUT""",2025-12-21,106.41,2025-12-01


In [93]:
monthly_df = monthly_df.group_by("month_first").agg(pl.col("Price (EUR/MWhe)").mean().alias("mean_price")).sort("month_first")

In [94]:
monthly_df = monthly_df.rename({
    "month_first": "ds",
    "mean_price":"y"
})
monthly_df

ds,y
date,f64
2015-01-01,29.935161
2015-02-01,36.695
2015-03-01,31.297419
2015-04-01,29.778333
2015-05-01,25.329677
…,…
2025-08-01,74.744194
2025-09-01,92.587667
2025-10-01,108.226452
2025-11-01,116.564


### Split datasets:

In [None]:
TRAIN_FILTER = pl.col("ds") < pl.datetime(2023, 1, 1)
VALIDATION_FILTER = pl.col("ds") >= pl.datetime(2023, 1, 1)
TEST_FILTER = pl.col("ds") >= pl.datetime(2024, 1, 1)

In [None]:
daily_train = daily_df.filter(TRAIN_FILTER)
daily_test = daily_df.filter(VALIDATION_FILTER)
monthly_train = monthly_df.filter(TRAIN_FILTER)

In [None]:
month_train = monthly_df.filter(TRAIN_FILTER)
month_val = monthly_df.filter(VALIDATION_FILTER)
month_test = monthly_df.filter(TEST_FILTER)