In [75]:
#%pip install statsforecast pandas polars pyarrow

In [76]:
import polars as pl
import pandas as pd

from statsforecast import StatsForecast
from statsforecast.models import SeasonalNaive, HistoricAverage, RandomWalkWithDrift

In [77]:
electricity_df = pl.read_csv("../data/eu_electricity_daily.csv")
electricity_df = electricity_df.with_columns(pl.col("Date").str.strptime(pl.Date, format="%Y-%m-%d").alias("Date"))

### Only use data for austria:

In [78]:
electricity_df = electricity_df.filter(electricity_df["ISO3 Code"] == "AUT")
electricity_df


Country,ISO3 Code,Date,Price (EUR/MWhe)
str,str,date,f64
"""Austria""","""AUT""",2015-01-01,22.34
"""Austria""","""AUT""",2015-01-02,22.34
"""Austria""","""AUT""",2015-01-03,22.34
"""Austria""","""AUT""",2015-01-04,22.34
"""Austria""","""AUT""",2015-01-05,36.18
…,…,…,…
"""Austria""","""AUT""",2025-12-18,135.16
"""Austria""","""AUT""",2025-12-19,126.37
"""Austria""","""AUT""",2025-12-20,111.08
"""Austria""","""AUT""",2025-12-21,106.41


In [79]:
electricity_df.null_count() # Data contains no null values

Country,ISO3 Code,Date,Price (EUR/MWhe)
u32,u32,u32,u32
0,0,0,0


### Create daily dataset:

In [80]:
daily_df = electricity_df.select([
    pl.col("Country").alias("unique_id"),
    pl.col("Date").alias("ds"),
    pl.col("Price (EUR/MWhe)").alias("y")
]).sort("ds")
daily_df

unique_id,ds,y
str,date,f64
"""Austria""",2015-01-01,22.34
"""Austria""",2015-01-02,22.34
"""Austria""",2015-01-03,22.34
"""Austria""",2015-01-04,22.34
"""Austria""",2015-01-05,36.18
…,…,…
"""Austria""",2025-12-18,135.16
"""Austria""",2025-12-19,126.37
"""Austria""",2025-12-20,111.08
"""Austria""",2025-12-21,106.41


### Create montly dataset:

In [81]:
monthly_df = electricity_df.with_columns(pl.col("Date").dt.truncate("1mo").alias("month_first"))
monthly_df

Country,ISO3 Code,Date,Price (EUR/MWhe),month_first
str,str,date,f64,date
"""Austria""","""AUT""",2015-01-01,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-02,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-03,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-04,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-05,36.18,2015-01-01
…,…,…,…,…
"""Austria""","""AUT""",2025-12-18,135.16,2025-12-01
"""Austria""","""AUT""",2025-12-19,126.37,2025-12-01
"""Austria""","""AUT""",2025-12-20,111.08,2025-12-01
"""Austria""","""AUT""",2025-12-21,106.41,2025-12-01


In [82]:
monthly_df = monthly_df.group_by(["Country", "month_first"]).agg(pl.col("Price (EUR/MWhe)").mean().alias("mean_price")).sort("month_first")

In [83]:
monthly_df = monthly_df.rename({
    "Country": "unique_id",
    "month_first": "ds",
    "mean_price":"y"
})
monthly_df

unique_id,ds,y
str,date,f64
"""Austria""",2015-01-01,29.935161
"""Austria""",2015-02-01,36.695
"""Austria""",2015-03-01,31.297419
"""Austria""",2015-04-01,29.778333
"""Austria""",2015-05-01,25.329677
…,…,…
"""Austria""",2025-08-01,74.744194
"""Austria""",2025-09-01,92.587667
"""Austria""",2025-10-01,108.226452
"""Austria""",2025-11-01,116.564


### Split datasets:

In [84]:
daily_df = daily_df.to_pandas()
monthly_df = monthly_df.to_pandas()

In [None]:
daily_train = daily_df[daily_df["ds"] < pd.to_datetime("2024-01-01")]
daily_val = daily_df[(daily_df["ds"] >= pd.to_datetime("2025-01-01")) & (daily_df["ds"] < pd.to_datetime("2026-01-01"))]
daily_test = None

In [None]:
monthly_train = monthly_df[monthly_df["ds"] < pd.to_datetime("2024-01-01")]
monthly_val = monthly_df[(monthly_df["ds"] >= pd.to_datetime("2025-01-01")) & (monthly_df["ds"] < pd.to_datetime("2026-01-01"))]
monthly_test = None

### Forecasts:

In [87]:
HORIZON_DAILY = 365 # days
HORIZON_MONTHLY = 12 # months

In [88]:
sf = StatsForecast(
    models=[
        SeasonalNaive(season_length=HORIZON_DAILY),
        RandomWalkWithDrift(),
        HistoricAverage(),
    ],
    freq="MS",
    n_jobs=1
)

stat_daily_val = sf.forecast(df=daily_train, h=HORIZON_DAILY, )
stat_daily_test = sf.forecast(df=pd.concat([daily_train, daily_val]), h=HORIZON_DAILY)

for forecast in [stat_daily_val, stat_daily_test]:
    forecast["Structural"] = (forecast["SeasonalNaive"] + forecast["RWD"]) / 2

stat_daily_val = stat_daily_val.merge(daily_val, on=["unique_id", "ds"])
stat_daily_test = stat_daily_test.merge(daily_test, on=["unique_id", "ds"])

In [89]:
sf = StatsForecast(
    models=[
        SeasonalNaive(season_length=HORIZON_MONTHLY),
        RandomWalkWithDrift(),
        HistoricAverage(),
    ],
    freq="MS",
    n_jobs=1
)

stat_monthly_val = sf.forecast(df=monthly_train, h=HORIZON_MONTHLY, )
stat_monthly_test = sf.forecast(df=pd.concat([monthly_train, monthly_val]), h=HORIZON_MONTHLY)

for forecast in [stat_monthly_val, stat_monthly_test]:
    forecast["Structural"] = (forecast["SeasonalNaive"] + forecast["RWD"]) / 2

stat_monthly_val = stat_monthly_val.merge(monthly_val, on=["unique_id", "ds"])
stat_monthly_test = stat_monthly_test.merge(monthly_test, on=["unique_id", "ds"])

In [90]:
stat_daily_val

Unnamed: 0,unique_id,ds,SeasonalNaive,RWD,HistoricAverage,Structural,y
0,Austria,2024-01-01,21.8,12.056872,75.806936,16.928436,16.19
1,Austria,2024-02-01,110.92,12.053743,75.806936,61.486872,82.16
2,Austria,2024-03-01,146.03,12.050615,75.806936,79.040307,70.05
3,Austria,2024-04-01,134.93,12.047486,75.806936,73.488743,19.1
4,Austria,2024-05-01,127.38,12.044358,75.806936,69.712179,4.06
5,Austria,2024-06-01,124.55,12.041229,75.806936,68.295615,57.64
6,Austria,2024-07-01,136.93,12.038101,75.806936,74.484051,82.61
7,Austria,2024-08-01,104.7,12.034973,75.806936,58.367486,89.16
8,Austria,2024-09-01,139.22,12.031844,75.806936,75.625922,71.21
9,Austria,2024-10-01,135.51,12.028716,75.806936,73.769358,75.6
