In [55]:
#%pip install statsforecast pandas polars pyarrow

In [56]:
import polars as pl
import pandas as pd

from statsforecast import StatsForecast
from statsforecast.models import SeasonalNaive, HistoricAverage, RandomWalkWithDrift

In [57]:
electricity_df = pl.read_csv("../data/eu_electricity_daily.csv")
electricity_df = electricity_df.with_columns(pl.col("Date").str.strptime(pl.Date, format="%Y-%m-%d").alias("Date"))

### Only use data for austria:

In [58]:
electricity_df = electricity_df.filter(electricity_df["ISO3 Code"] == "AUT")
electricity_df


Country,ISO3 Code,Date,Price (EUR/MWhe)
str,str,date,f64
"""Austria""","""AUT""",2015-01-01,22.34
"""Austria""","""AUT""",2015-01-02,22.34
"""Austria""","""AUT""",2015-01-03,22.34
"""Austria""","""AUT""",2015-01-04,22.34
"""Austria""","""AUT""",2015-01-05,36.18
…,…,…,…
"""Austria""","""AUT""",2025-12-18,135.16
"""Austria""","""AUT""",2025-12-19,126.37
"""Austria""","""AUT""",2025-12-20,111.08
"""Austria""","""AUT""",2025-12-21,106.41


In [59]:
electricity_df.null_count() # Data contains no null values

Country,ISO3 Code,Date,Price (EUR/MWhe)
u32,u32,u32,u32
0,0,0,0


### Create daily dataset:

In [60]:
daily_df = electricity_df.select([
    pl.col("Country").alias("unique_id"),
    pl.col("Date").alias("ds"),
    pl.col("Price (EUR/MWhe)").alias("y")
]).sort("ds")
daily_df

unique_id,ds,y
str,date,f64
"""Austria""",2015-01-01,22.34
"""Austria""",2015-01-02,22.34
"""Austria""",2015-01-03,22.34
"""Austria""",2015-01-04,22.34
"""Austria""",2015-01-05,36.18
…,…,…
"""Austria""",2025-12-18,135.16
"""Austria""",2025-12-19,126.37
"""Austria""",2025-12-20,111.08
"""Austria""",2025-12-21,106.41


### Create montly dataset:

In [61]:
monthly_df = electricity_df.with_columns(pl.col("Date").dt.truncate("1mo").alias("month_first"))
monthly_df

Country,ISO3 Code,Date,Price (EUR/MWhe),month_first
str,str,date,f64,date
"""Austria""","""AUT""",2015-01-01,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-02,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-03,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-04,22.34,2015-01-01
"""Austria""","""AUT""",2015-01-05,36.18,2015-01-01
…,…,…,…,…
"""Austria""","""AUT""",2025-12-18,135.16,2025-12-01
"""Austria""","""AUT""",2025-12-19,126.37,2025-12-01
"""Austria""","""AUT""",2025-12-20,111.08,2025-12-01
"""Austria""","""AUT""",2025-12-21,106.41,2025-12-01


In [62]:
monthly_df = monthly_df.group_by(["Country", "month_first"]).agg(pl.col("Price (EUR/MWhe)").mean().alias("mean_price")).sort("month_first")

In [63]:
monthly_df = monthly_df.rename({
    "Country": "unique_id",
    "month_first": "ds",
    "mean_price":"y"
})
monthly_df

unique_id,ds,y
str,date,f64
"""Austria""",2015-01-01,29.935161
"""Austria""",2015-02-01,36.695
"""Austria""",2015-03-01,31.297419
"""Austria""",2015-04-01,29.778333
"""Austria""",2015-05-01,25.329677
…,…,…
"""Austria""",2025-08-01,74.744194
"""Austria""",2025-09-01,92.587667
"""Austria""",2025-10-01,108.226452
"""Austria""",2025-11-01,116.564


### Split datasets:

In [64]:
daily_df = daily_df.to_pandas()
monthly_df = monthly_df.to_pandas()

In [65]:
daily_train = daily_df[daily_df["ds"] < pd.to_datetime("2024-01-01")]
daily_val = daily_df[(daily_df["ds"] >= pd.to_datetime("2024-01-01")) & (daily_df["ds"] < pd.to_datetime("2025-01-01"))]
daily_test = daily_df[daily_df["ds"] >= pd.to_datetime("2025-01-01")]

In [66]:
month_train = monthly_df[monthly_df["ds"] < pd.to_datetime("2024-01-01")]
month_val = monthly_df[(monthly_df["ds"] >= pd.to_datetime("2024-01-01")) & (monthly_df["ds"] < pd.to_datetime("2025-01-01"))]
month_test = monthly_df[monthly_df["ds"] >= pd.to_datetime("2025-01-01")]

### Forecasts:

In [67]:
HORIZON_DAILY = 365 # days
HORIZON_MONTHLY = 12 # months

In [68]:
sf = StatsForecast(
    models=[
        SeasonalNaive(season_length=12),
        RandomWalkWithDrift(),
        HistoricAverage(),
    ],
    freq="MS",
    n_jobs=1
)

stat_monthly_val = sf.forecast(df=month_train, h=HORIZON_MONTHLY, )
stat_monthly_test = sf.forecast(df=pd.concat([month_train, month_val]), h=HORIZON_MONTHLY)

In [69]:
def run_baseline_forecast(text: str, fold_data: dict, display: bool = False):
    train = fold_data["train"]
    validation = fold_data["validate"]
    test = fold_data["test"]

    

    

    # Structural Model (average of Seasonal Naive + RWD)
    for fc in [fc_val, fc_test]:
        fc["Structural"] = (fc["SeasonalNaive"] + fc["RWD"]) / 2

    validation_evaluation = fc_val.merge(validation[["unique_id","ds","y"]], on=["unique_id","ds"], how="left")
    test_evaluation = fc_test.merge(test[["unique_id", "ds", "y"]], on=["unique_id", "ds"], how="left")
    
    if display:     
        print(text) 
        print("Validation Forecasts:")
        display(validation_evaluation)
        print("Test Forecasts:")
        display(test_evaluation)
        
    return { 
        "validation": validation_evaluation.fillna(0), 
        "test": test_evaluation.fillna(0),
        "validation_additional": None,
        "test_additional": None
    }