# Joining the individual datasets

In [1]:
import pandas as pd
import polars as pl
import polars.selectors as cs
import numpy as np
import plotnine as pn
from mizani.formatters import comma_format, custom_format, currency_format, percent_format
from IPython.display import clear_output, display
import os
import glob
import requests
from datetime import datetime, timedelta
import pytz
from tqdm.notebook import tqdm

jama_colour = [
    "#374e55",
    "#df8f44",
    "#00a1d5",
    "#b24745",
    "#79af97",
]

pd.set_option("display.max.columns", 500)
pd.set_option("display.max.columns", 500)
background_colour = "#F2F2F2"
pn.theme_set(
    pn.theme_classic()
    + pn.theme(
        figure_size=[7, 7/1.618],
        text=pn.element_text(family="monospace"),
        plot_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        panel_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        legend_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
    )
)


%load_ext blackcellmagic
%matplotlib inline

### Auction Prices

In [3]:
jao_chde = (
    pl.from_pandas(pd.read_csv("jao_prices_CH-DE.csv"))
    .select(pl.exclude(["date", "corridor", "hour"]))
    .with_columns(
        pl.col("delivery_begin_time_ch")
        .str.to_datetime()
        .dt.convert_time_zone("Europe/Zurich")
        .alias("date")
    )
    .with_columns(pl.col("date").dt.date().alias("day"))
    .drop("delivery_begin_time_ch")
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "auction_price"})
)

jao_dech = (
    pl.from_pandas(pd.read_csv("jao_prices_DE-CH.csv"))
    .select(pl.exclude(["date", "corridor", "hour"]))
    .with_columns(
        pl.col("delivery_begin_time_ch")
        .str.to_datetime()
        .dt.convert_time_zone("Europe/Zurich")
        .alias("date")
    )
    .with_columns(pl.col("date").dt.date().alias("day"))
    .drop("delivery_begin_time_ch")
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "auction_price"})
)

jao_chde

auction_price,allocatedCapacity,ATC,date,day,dst
f64,i64,i64,str,date,i16
0.0,4232,4233,"""2019-01-01 03:…",2019-01-01,1
0.0,4232,4233,"""2019-01-01 09:…",2019-01-01,1
0.0,4233,4233,"""2019-01-02 12:…",2019-01-02,1
0.0,4230,4233,"""2019-01-02 20:…",2019-01-02,1
0.0,4232,4233,"""2019-01-03 08:…",2019-01-03,1
0.0,4232,4233,"""2019-01-03 12:…",2019-01-03,1
0.0,4233,4233,"""2019-01-04 08:…",2019-01-04,1
0.0,4233,4233,"""2019-01-04 18:…",2019-01-04,1
0.0,4230,4233,"""2019-01-05 01:…",2019-01-05,1
0.0,4230,4233,"""2019-01-05 18:…",2019-01-05,1


In [5]:
jao_chde.filter(pl.col("date") == "2019-10-27 02:00")

auction_price,allocatedCapacity,ATC,date,day,dst
f64,i64,i64,str,date,i16
0.0,3857,3863,"""2019-10-27 02:…",2019-10-27,1
0.0,3863,3863,"""2019-10-27 02:…",2019-10-27,0


In [212]:
jao_chde["date"].value_counts(sort=True).filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:00 ┆ 2     │
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└──────────────────┴───────┘


In [213]:
jao_dech["date"].value_counts(sort=True).filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:00 ┆ 2     │
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└──────────────────┴───────┘


Now the observations are uniquely identifiable.

### Day-Ahead Prices 

- Aggregation: These values are already in hourly format
- Join ID: date

In [214]:
da_at = (
    pl.read_csv("day_ahead_prices_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_at"})
)

da_at.pipe(print)
da_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_at ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-01-01 03:00 ┆ 27.87              ┆ 1   │
│ 2019-01-01 11:00 ┆ 9.2                ┆ 1   │
│ 2019-01-01 23:00 ┆ -4.95              ┆ 1   │
│ 2019-01-02 00:00 ┆ -4.98              ┆ 1   │
│ 2019-01-02 10:00 ┆ 50.91              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2024-01-30 14:00 ┆ 71.93              ┆ 1   │
│ 2024-01-30 22:00 ┆ 77.86              ┆ 1   │
│ 2024-01-31 07:00 ┆ 104.94             ┆ 1   │
│ 2024-01-31 10:00 ┆ 82.98              ┆ 1   │
│ 2024-01-31 18:00 ┆ 101.1              ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [215]:
da_ch = (
    pl.read_csv("day_ahead_prices_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_ch"})
)

da_ch.pipe(print)
da_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_ch ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-01-01 08:00 ┆ 28.7               ┆ 1   │
│ 2019-01-01 09:00 ┆ 35.7               ┆ 1   │
│ 2019-01-01 13:00 ┆ 43.07              ┆ 1   │
│ 2019-01-02 05:00 ┆ 49.21              ┆ 1   │
│ 2019-01-02 09:00 ┆ 64.83              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2024-01-30 16:00 ┆ 83.89              ┆ 1   │
│ 2024-01-30 19:00 ┆ 92.01              ┆ 1   │
│ 2024-01-30 23:00 ┆ 76.09              ┆ 1   │
│ 2024-01-31 00:00 ┆ 74.03              ┆ 1   │
│ 2024-01-31 04:00 ┆ 68.6               ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [216]:
da_delu = (
    pl.read_csv("day_ahead_prices_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_de"})
)

da_delu.pipe(print)
da_delu["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_de ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 28.32              ┆ 1   │
│ 2019-01-01 01:00 ┆ 10.07              ┆ 1   │
│ 2019-01-01 02:00 ┆ -4.08              ┆ 1   │
│ 2019-01-01 16:00 ┆ 1.97               ┆ 1   │
│ 2019-01-01 21:00 ┆ -24.93             ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2024-01-30 17:00 ┆ 88.17              ┆ 1   │
│ 2024-01-30 19:00 ┆ 91.91              ┆ 1   │
│ 2024-01-31 12:00 ┆ 74.91              ┆ 1   │
│ 2024-01-31 16:00 ┆ 81.62              ┆ 1   │
│ 2024-02-01 00:00 ┆ 47.06              ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [217]:
da_fr = (
    pl.read_csv("day_ahead_prices_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_fr"})
)

da_fr.pipe(print)
da_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_fr ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-01-01 09:00 ┆ 25.0               ┆ 1   │
│ 2019-01-01 18:00 ┆ 62.65              ┆ 1   │
│ 2019-01-02 13:00 ┆ 58.65              ┆ 1   │
│ 2019-01-02 22:00 ┆ 55.97              ┆ 1   │
│ 2019-01-02 23:00 ┆ 54.31              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2024-01-29 08:00 ┆ 99.26              ┆ 1   │
│ 2024-01-29 10:00 ┆ 78.9               ┆ 1   │
│ 2024-01-29 23:00 ┆ 80.29              ┆ 1   │
│ 2024-01-30 13:00 ┆ 68.23              ┆ 1   │
│ 2024-01-30 20:00 ┆ 88.32              ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

### Actual Load

In [272]:
load_at = (
    pl.read_csv("actual_load_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_at"})
)

load_at.pipe(print)
load_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_at ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 6075.0         ┆ 1   │
│ 2019-01-01 01:00 ┆ 5852.75        ┆ 1   │
│ 2019-01-01 02:00 ┆ 5619.25        ┆ 1   │
│ 2019-01-01 03:00 ┆ 5324.0         ┆ 1   │
│ 2019-01-01 04:00 ┆ 5273.5         ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 8433.0         ┆ 1   │
│ 2024-01-31 20:00 ┆ 7892.75        ┆ 1   │
│ 2024-01-31 21:00 ┆ 7312.0         ┆ 1   │
│ 2024-01-31 22:00 ┆ 7065.0         ┆ 1   │
│ 2024-01-31 23:00 ┆ 6610.25        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [274]:
load_ch = (
    pl.read_csv("actual_load_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_ch"})
)

load_ch.pipe(print)
load_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_ch ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 7037.0         ┆ 1   │
│ 2019-01-01 01:00 ┆ 7096.0         ┆ 1   │
│ 2019-01-01 02:00 ┆ 7244.0         ┆ 1   │
│ 2019-01-01 03:00 ┆ 7443.0         ┆ 1   │
│ 2019-01-01 04:00 ┆ 7353.0         ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 8055.0         ┆ 1   │
│ 2024-01-31 20:00 ┆ 7774.0         ┆ 1   │
│ 2024-01-31 21:00 ┆ 7417.0         ┆ 1   │
│ 2024-01-31 22:00 ┆ 6997.0         ┆ 1   │
│ 2024-01-31 23:00 ┆ 6821.0         ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [276]:
load_de = (
    pl.read_csv("actual_load_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_de"})
)

load_de.pipe(print)
load_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_de ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 43713.5        ┆ 1   │
│ 2019-01-01 01:00 ┆ 42091.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 40537.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 39725.75       ┆ 1   │
│ 2019-01-01 04:00 ┆ 39502.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 69814.25       ┆ 1   │
│ 2024-01-31 20:00 ┆ 66137.75       ┆ 1   │
│ 2024-01-31 21:00 ┆ 62751.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 60155.25       ┆ 1   │
│ 2024-01-31 23:00 ┆ 56119.75       ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [277]:
load_fr = (
    pl.read_csv("actual_load_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_fr"})
)

load_fr.pipe(print)
load_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_518, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_fr ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 62176.0        ┆ 1   │
│ 2019-01-01 01:00 ┆ 60301.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 58540.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 55144.0        ┆ 1   │
│ 2019-01-01 04:00 ┆ 52978.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 63343.0        ┆ 1   │
│ 2024-01-31 20:00 ┆ 64827.0        ┆ 1   │
│ 2024-01-31 21:00 ┆ 61646.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 58398.0        ┆ 1   │
│ 2024-01-31 23:00 ┆ 58517.0        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (2, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└───────────────

In [278]:
load_it = (
    pl.read_csv("actual_load_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_it"})
)

load_it.pipe(print)
load_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_it ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 23644.0        ┆ 1   │
│ 2019-01-01 01:00 ┆ 22850.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 21600.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 20255.0        ┆ 1   │
│ 2019-01-01 04:00 ┆ 19459.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 43670.0        ┆ 1   │
│ 2024-01-31 20:00 ┆ 41362.0        ┆ 1   │
│ 2024-01-31 21:00 ┆ 38122.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 34165.0        ┆ 1   │
│ 2024-01-31 23:00 ┆ 30578.0        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

### Forecast Wind and Solar

In [296]:
wind_solar_forecast_at = (
    pl.read_csv("wind_solar_forecast_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_at",
            "wind_onshore_forecast": "wind_onshore_forecast_at",
        }
    )
)

wind_solar_forecast_at.pipe(print)
wind_solar_forecast_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_at ┆ wind_onshore_forecast_at ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 487.0                    ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 398.0                    ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 338.0                    ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 309.0                    ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ 320.0                    ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ 131.0                    ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ 152.0                    ┆ 1   │
│ 2024-01-31 21:00 

In [302]:
wind_solar_forecast_ch = (
    pl.read_csv("wind_solar_forecast_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_ch",
            "wind_onshore_forecast": "wind_onshore_forecast_ch",
        }
    )
)

wind_solar_forecast_ch.pipe(print)
wind_solar_forecast_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_ch ┆ wind_onshore_forecast_ch ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ 0.0                      ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ 0.0                      ┆ 1   │
│ 2024-01-31 21:00 

In [305]:
wind_solar_forecast_de = (
    pl.read_csv("wind_solar_forecast_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
        pl.mean("wind_offshore_forecast")
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_de",
            "wind_offshore_forecast": "wind_offshore_forecast_de",
            "wind_onshore_forecast": "wind_onshore_forecast_de",
        }
    )
)

wind_solar_forecast_de.pipe(print)
wind_solar_forecast_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_546, 5)
┌──────────────────┬───────────────────┬──────────────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_de ┆ wind_onshore_forecast_de ┆ wind_offshore_forecast_d ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ e                        ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ ---                      ┆ i16 │
│                  ┆                   ┆                          ┆ f64                      ┆     │
╞══════════════════╪═══════════════════╪══════════════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 18879.75                 ┆ 5069.0                   ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 20626.5                  ┆ 5042.25                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 22355.5                  ┆ 5028.5                   ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 24032.5                  ┆ 4977

In [307]:
wind_solar_forecast_fr = (
    pl.read_csv("wind_solar_forecast_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
        pl.mean("wind_offshore_forecast")
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_fr",
            "wind_offshore_forecast": "wind_offshore_forecast_fr",
            "wind_onshore_forecast": "wind_onshore_forecast_fr",
        }
    )
)

wind_solar_forecast_fr.pipe(print)
wind_solar_forecast_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_344, 5)
┌──────────────────┬───────────────────┬──────────────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_fr ┆ wind_onshore_forecast_fr ┆ wind_offshore_forecast_f ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ r                        ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ ---                      ┆ i16 │
│                  ┆                   ┆                          ┆ str                      ┆     │
╞══════════════════╪═══════════════════╪══════════════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 1698.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 1680.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 1675.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 1670.0                   ┆ null

In [309]:
wind_solar_forecast_it = (
    pl.read_csv("wind_solar_forecast_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_it",
            "wind_onshore_forecast": "wind_onshore_forecast_it",
        }
    )
)

wind_solar_forecast_it.pipe(print)
wind_solar_forecast_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (43_314, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_it ┆ wind_onshore_forecast_it ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ str                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2024-01-31 21:00 

### Generation Actuals (All Types)

#### Austria

In [409]:
generation_at = pl.read_csv("generation_actual_all_AT.csv", has_header=False)
generation_at.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26,column_27
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Biomass""","""Fossil Gas""","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Other""","""Other""","""Other renewabl…","""Other renewabl…","""Solar""","""Solar""","""Waste""","""Waste""","""Wind Onshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…"
"""2019-01-01 00:…","""304.0""","""0.0""","""1339.0""","""0.0""","""154.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""1577.0""","""2699.0""","""0.0""","""140.0""","""0.0""","""22.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""100.0""","""0.0""","""288.0""","""0.0"""


In [410]:
column_names = (
    generation_at.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("AT")], separator=" ").str.to_lowercase().str.replace_all(" ", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_at',
 'biomass_actual_consumption_at',
 'fossil_gas_actual_aggregated_at',
 'fossil_gas_actual_consumption_at',
 'fossil_hard_coal_actual_aggregated_at',
 'fossil_hard_coal_actual_consumption_at',
 'fossil_oil_actual_aggregated_at',
 'fossil_oil_actual_consumption_at',
 'geothermal_actual_aggregated_at',
 'geothermal_actual_consumption_at',
 'hydro_pumped_storage_actual_aggregated_at',
 'hydro_pumped_storage_actual_consumption_at',
 'hydro_run-of-river_and_poundage_actual_aggregated_at',
 'hydro_run-of-river_and_poundage_actual_consumption_at',
 'hydro_water_reservoir_actual_aggregated_at',
 'hydro_water_reservoir_actual_consumption_at',
 'other_actual_aggregated_at',
 'other_actual_consumption_at',
 'other_renewable_actual_aggregated_at',
 'other_renewable_actual_consumption_at',
 'solar_actual_aggregated_at',
 'solar_actual_consumption_at',
 'waste_actual_aggregated_at',
 'waste_actual_consumption_at',
 'wind_onshore_actual_aggregated_at',
 'wind_

In [411]:
generation_at = (
    generation_at.tail(-2)
    .rename(dict(zip(generation_at.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .group_by("date")
    .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [413]:
generation_at.pipe(print)
generation_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 28)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ biomass_ac ┆ fossil_gas ┆ … ┆ waste_actu ┆ wind_onsh ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ tual_consu ┆ _actual_ag ┆   ┆ al_consump ┆ ore_actua ┆ ore_actua ┆ --- │
│ str        ┆ gated_at   ┆ mption_at  ┆ gregated_a ┆   ┆ tion_at    ┆ l_aggrega ┆ l_consump ┆ i16 │
│            ┆ ---        ┆ ---        ┆ t          ┆   ┆ ---        ┆ ted_a…    ┆ tion_…    ┆     │
│            ┆ f64        ┆ f64        ┆ ---        ┆   ┆ f64        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆            ┆ f64        ┆   ┆            ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 304.0      ┆ 0.0        ┆ 1282.75    ┆ … ┆ 0.0        ┆ 248.0     ┆ 0.0       ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### Switzerland

In [454]:
generation_ch = pl.read_csv("generation_actual_all_CH.csv", has_header=False)
generation_ch.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7
str,str,str,str,str,str,str
"""date""","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Nuclear""","""Solar""","""Wind Onshore"""
"""2019-01-01 00:…","""81.0""","""123.0""","""481.0""","""3243.0""","""0.0""","""1.0"""
"""2019-01-01 01:…","""76.0""","""124.0""","""393.0""","""3243.0""","""0.0""","""0.0"""


In [455]:
column_names = (
    generation_ch.head(1)
    .transpose()
    .with_columns(pl.col("column_0").str.strip_chars())
    .with_columns(pl.col("column_0").str.to_lowercase().str.replace_all(" ", "_"))
    .with_columns(pl.concat_str([pl.col("column_0"), pl.lit("_ch")], separator=""))[
        "column_0"
    ]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'hydro_pumped_storage_ch',
 'hydro_run-of-river_and_poundage_ch',
 'hydro_water_reservoir_ch',
 'nuclear_ch',
 'solar_ch',
 'wind_onshore_ch']

In [456]:
generation_ch = (
    generation_ch.tail(-1)
    .rename(dict(zip(generation_ch.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [457]:
generation_ch.pipe(print)
generation_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 8)
┌─────────────┬─────────────┬─────────────┬─────────────┬────────────┬──────────┬────────────┬─────┐
│ date        ┆ hydro_pumpe ┆ hydro_run-o ┆ hydro_water ┆ nuclear_ch ┆ solar_ch ┆ wind_onsho ┆ dst │
│ ---         ┆ d_storage_c ┆ f-river_and ┆ _reservoir_ ┆ ---        ┆ ---      ┆ re_ch      ┆ --- │
│ str         ┆ h           ┆ _poundage_… ┆ ch          ┆ f64        ┆ f64      ┆ ---        ┆ i16 │
│             ┆ ---         ┆ ---         ┆ ---         ┆            ┆          ┆ f64        ┆     │
│             ┆ f64         ┆ f64         ┆ f64         ┆            ┆          ┆            ┆     │
╞═════════════╪═════════════╪═════════════╪═════════════╪════════════╪══════════╪════════════╪═════╡
│ 2019-01-01  ┆ 81.0        ┆ 123.0       ┆ 481.0       ┆ 3243.0     ┆ 0.0      ┆ 1.0        ┆ 1   │
│ 00:00       ┆             ┆             ┆             ┆            ┆          ┆            ┆     │
│ 2019-01-01  ┆ 76.0        ┆ 124.0       ┆ 393.0       ┆ 3243.0     ┆ 0

#### Germany

In [468]:
generation_de = pl.read_csv("generation_actual_all_DE_LU.csv", has_header=False)
generation_de.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Brown c…","""Fossil Coal-de…","""Fossil Gas""","""Fossil Gas""","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Nuclear""","""Nuclear""","""Other""","""Other renewabl…","""Other renewabl…","""Solar""","""Solar""","""Waste""","""Wind Offshore""","""Wind Onshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…"
"""2019-01-01 00:…","""4812.0""","""6932.0""","""273.0""","""3410.0""","""1.0""","""3345.0""","""482.0""",,"""19.0""","""350.0""","""1827.0""","""1485.0""","""132.0""",,"""9002.0""",,"""475.0""","""107.0""",,"""0.0""",,"""783.0""","""3177.0""","""19366.0""",


In [469]:
column_names = (
    generation_de.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("DE")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_de',
 'fossil_brown_coal_lignite_actual_aggregated_de',
 'fossil_coal-derived_gas_actual_aggregated_de',
 'fossil_gas_actual_aggregated_de',
 'fossil_gas_actual_consumption_de',
 'fossil_hard_coal_actual_aggregated_de',
 'fossil_oil_actual_aggregated_de',
 'fossil_oil_actual_consumption_de',
 'geothermal_actual_aggregated_de',
 'hydro_pumped_storage_actual_aggregated_de',
 'hydro_pumped_storage_actual_consumption_de',
 'hydro_run-of-river_and_poundage_actual_aggregated_de',
 'hydro_water_reservoir_actual_aggregated_de',
 'hydro_water_reservoir_actual_consumption_de',
 'nuclear_actual_aggregated_de',
 'nuclear_actual_consumption_de',
 'other_actual_aggregated_de',
 'other_renewable_actual_aggregated_de',
 'other_renewable_actual_consumption_de',
 'solar_actual_aggregated_de',
 'solar_actual_consumption_de',
 'waste_actual_aggregated_de',
 'wind_offshore_actual_aggregated_de',
 'wind_onshore_actual_aggregated_de',
 'wind_onshore_actual_consumption_de'

In [470]:
generation_de = (
    generation_de.tail(-2)
    .rename(dict(zip(generation_de.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .group_by("date")
    .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [471]:
generation_de.pipe(print)
generation_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 27)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_bro ┆ fossil_coa ┆ … ┆ wind_offsh ┆ wind_onsh ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ wn_coal_li ┆ l-derived_ ┆   ┆ ore_actual ┆ ore_actua ┆ ore_actua ┆ --- │
│ str        ┆ gated_de   ┆ gnite_actu ┆ gas_actual ┆   ┆ _aggregate ┆ l_aggrega ┆ l_consump ┆ i16 │
│            ┆ ---        ┆ al…        ┆ _a…        ┆   ┆ d_…        ┆ ted_d…    ┆ tion_…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ ---        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆ f64        ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 4831.25    ┆ 6335.0     ┆ 429.0      ┆ … ┆ 3134.0     ┆ 20401.5   ┆ null      ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### France

In [2]:
generation_fr = pl.read_csv("generation_actual_all_FR.csv", has_header=False)
generation_fr.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Nuclear""","""Solar""","""Waste""","""Wind Offshore""","""Wind Offshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…"
"""2019-01-01 00:…","""351.0""","""2722.0""","""0.0""",,"""207.0""",,"""1377.0""","""3552.0""","""1054.0""",,"""55627.0""","""0.0""","""253.0""",,,"""1622.0"""


In [3]:
column_names = (
    generation_fr.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("FR")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_fr',
 'fossil_gas_actual_aggregated_fr',
 'fossil_hard_coal_actual_aggregated_fr',
 'fossil_hard_coal_actual_consumption_fr',
 'fossil_oil_actual_aggregated_fr',
 'hydro_pumped_storage_actual_aggregated_fr',
 'hydro_pumped_storage_actual_consumption_fr',
 'hydro_run-of-river_and_poundage_actual_aggregated_fr',
 'hydro_water_reservoir_actual_aggregated_fr',
 'hydro_water_reservoir_actual_consumption_fr',
 'nuclear_actual_aggregated_fr',
 'solar_actual_aggregated_fr',
 'waste_actual_aggregated_fr',
 'wind_offshore_actual_aggregated_fr',
 'wind_offshore_actual_consumption_fr',
 'wind_onshore_actual_aggregated_fr']

In [4]:
generation_fr = (
    generation_fr.tail(-2)
    .rename(dict(zip(generation_fr.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    # .group_by("date")
    # .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [5]:
generation_fr.pipe(print)
generation_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_555, 18)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_gas ┆ fossil_har ┆ … ┆ wind_offsh ┆ wind_offs ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ _actual_ag ┆ d_coal_act ┆   ┆ ore_actual ┆ hore_actu ┆ ore_actua ┆ --- │
│ str        ┆ gated_fr   ┆ gregated_f ┆ ual_aggreg ┆   ┆ _aggregate ┆ al_consum ┆ l_aggrega ┆ i16 │
│            ┆ ---        ┆ r          ┆ at…        ┆   ┆ d_…        ┆ ption…    ┆ ted_f…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ ---        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆ f64        ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 351.0      ┆ 2722.0     ┆ 0.0        ┆ … ┆ null       ┆ null      ┆ 1622.0    ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### Italy

In [483]:
generation_it = pl.read_csv("generation_actual_all_IT.csv", has_header=False)
generation_it.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Coal-de…","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Other""","""Other""","""Solar""","""Solar""","""Waste""","""Wind Offshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…"
"""2019-01-01 00:…","""495.0""","""776.0""","""8053.0""","""1938.0""",,"""8.0""",,"""674.0""","""1.0""","""29.0""","""1891.0""","""289.0""","""3070.0""",,"""0.0""",,"""37.0""",,"""5076.0"""


In [485]:
column_names = (
    generation_it.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("IT")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_it',
 'fossil_coal-derived_gas_actual_aggregated_it',
 'fossil_gas_actual_aggregated_it',
 'fossil_hard_coal_actual_aggregated_it',
 'fossil_hard_coal_actual_consumption_it',
 'fossil_oil_actual_aggregated_it',
 'fossil_oil_actual_consumption_it',
 'geothermal_actual_aggregated_it',
 'hydro_pumped_storage_actual_aggregated_it',
 'hydro_pumped_storage_actual_consumption_it',
 'hydro_run-of-river_and_poundage_actual_aggregated_it',
 'hydro_water_reservoir_actual_aggregated_it',
 'other_actual_aggregated_it',
 'other_actual_consumption_it',
 'solar_actual_aggregated_it',
 'solar_actual_consumption_it',
 'waste_actual_aggregated_it',
 'wind_offshore_actual_aggregated_it',
 'wind_onshore_actual_aggregated_it']

In [486]:
generation_it = (
    generation_it.tail(-2)
    .rename(dict(zip(generation_it.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    # .group_by("date")
    # .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [487]:
generation_it.pipe(print)
generation_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 21)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_coa ┆ fossil_gas ┆ … ┆ waste_actu ┆ wind_offs ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ l-derived_ ┆ _actual_ag ┆   ┆ al_aggrega ┆ hore_actu ┆ ore_actua ┆ --- │
│ str        ┆ gated_it   ┆ gas_actual ┆ gregated_i ┆   ┆ ted_it     ┆ al_aggreg ┆ l_aggrega ┆ i16 │
│            ┆ ---        ┆ _a…        ┆ t          ┆   ┆ ---        ┆ ated_…    ┆ ted_i…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ f64        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆            ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 495.0      ┆ 776.0      ┆ 8053.0     ┆ … ┆ 37.0       ┆ null      ┆ 5076.0    ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

### Water Reservoirs

In [539]:
hydro_storage_at = (
    pl.read_csv("hydro_reservoir_storage_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_at", "date": "week_start"})
)

hydro_storage_at.pipe(print)

shape: (265, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_at │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 1.412194e6                 │
│ 2019-01-07 ┆ 1.372937e6                 │
│ 2019-01-14 ┆ 1.326312e6                 │
│ 2019-01-21 ┆ 1.176602e6                 │
│ 2019-01-28 ┆ 1.077808e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 1.543144e6                 │
│ 2024-01-08 ┆ 1.423063e6                 │
│ 2024-01-15 ┆ 1.26075e6                  │
│ 2024-01-22 ┆ 1.157852e6                 │
│ 2024-01-29 ┆ 1.073592e6                 │
└────────────┴────────────────────────────┘


In [540]:
hydro_storage_ch = (
    pl.read_csv("hydro_reservoir_storage_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_ch", "date": "week_start"})
)

hydro_storage_ch.pipe(print)

shape: (265, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_ch │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 4.656491e6                 │
│ 2019-01-07 ┆ 4.420007e6                 │
│ 2019-01-14 ┆ 4.154307e6                 │
│ 2019-01-21 ┆ 3.693327e6                 │
│ 2019-01-28 ┆ 3.288999e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 4.253319e6                 │
│ 2024-01-08 ┆ 4.031253e6                 │
│ 2024-01-15 ┆ 2.996454e6                 │
│ 2024-01-22 ┆ 3.326345e6                 │
│ 2024-01-29 ┆ 2.47213e6                  │
└────────────┴────────────────────────────┘


In [541]:
hydro_storage_fr = (
    pl.read_csv("hydro_reservoir_storage_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_fr", "date": "week_start"})
)

hydro_storage_fr.pipe(print)

shape: (263, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_fr │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 2.186488e6                 │
│ 2019-01-07 ┆ 2.055622e6                 │
│ 2019-01-14 ┆ 1.931241e6                 │
│ 2019-01-21 ┆ 1.750281e6                 │
│ 2019-01-28 ┆ 1.591274e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 2.795844e6                 │
│ 2024-01-08 ┆ 2.368286e6                 │
│ 2024-01-15 ┆ 2.201546e6                 │
│ 2024-01-22 ┆ 2.146234e6                 │
│ 2024-01-29 ┆ 2.010901e6                 │
└────────────┴────────────────────────────┘


In [542]:
hydro_storage_it = (
    pl.read_csv("hydro_reservoir_storage_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_it", "date": "week_start"})
)

hydro_storage_it.pipe(print)

shape: (264, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_it │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 3.036299e6                 │
│ 2019-01-07 ┆ 2.704739e6                 │
│ 2019-01-14 ┆ 2.593794e6                 │
│ 2019-01-21 ┆ 2.49019e6                  │
│ 2019-01-28 ┆ 2.473671e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 3.280575e6                 │
│ 2024-01-08 ┆ 3.053817e6                 │
│ 2024-01-15 ┆ 2.926584e6                 │
│ 2024-01-22 ┆ 2.778409e6                 │
│ 2024-01-29 ┆ 2.619505e6                 │
└────────────┴────────────────────────────┘


### Crossborder Flows

In [582]:
filenames = [filename for filename in os.listdir() if "physical_flow" in filename]
filenames

['crossborder_physical_flow_mw_AT_CH.csv',
 'crossborder_physical_flow_mw_CH_AT.csv',
 'crossborder_physical_flow_mw_CH_DE_LU.csv',
 'crossborder_physical_flow_mw_CH_FR.csv',
 'crossborder_physical_flow_mw_CH_IT.csv',
 'crossborder_physical_flow_mw_DE_LU_CH.csv',
 'crossborder_physical_flow_mw_FR_CH.csv',
 'crossborder_physical_flow_mw_IT_CH.csv']

In [583]:
flow_at_ch = (
    pl.read_csv("crossborder_physical_flow_mw_AT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_at_ch"})
    .sort("date")
)

flow_at_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_at_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 763.0                         ┆ 1   │
│ 2019-01-01 01:00 ┆ 497.0                         ┆ 1   │
│ 2019-01-01 02:00 ┆ 427.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 452.0                         ┆ 1   │
│ 2019-01-01 04:00 ┆ 603.0                         ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 194.0                         ┆ 1   │
│ 2024-01-31 20:00 ┆ 504.0                         ┆ 1   │
│ 2024-01-31 21:00 ┆ 678.0                         ┆ 1   │
│ 2024-01-31 22:00 ┆ 726.0                         ┆ 1   │
│ 2024-01-31 23:00 ┆ 952.0                         ┆ 1   │
└──────────────────┴─────────────────

In [584]:
flow_ch_at = (
    pl.read_csv("crossborder_physical_flow_mw_CH_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_at"})
    .sort("date")
)

flow_ch_at.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_at ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

In [585]:
flow_ch_de_lu = (
    pl.read_csv("crossborder_physical_flow_mw_CH_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_de_lu"})
    .sort("date")
)

flow_ch_de_lu.pipe(print)

shape: (110_368, 3)
┌──────────────────┬──────────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_de_lu ┆ dst │
│ ---              ┆ ---                              ┆ --- │
│ str              ┆ f64                              ┆ i16 │
╞══════════════════╪══════════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 595.0                            ┆ 1   │
│ 2019-01-01 01:00 ┆ 502.0                            ┆ 1   │
│ 2019-01-01 02:00 ┆ 512.0                            ┆ 1   │
│ 2019-01-01 03:00 ┆ 544.0                            ┆ 1   │
│ 2019-01-01 04:00 ┆ 451.0                            ┆ 1   │
│ …                ┆ …                                ┆ …   │
│ 2024-01-31 22:45 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:15 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:30 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:45 ┆ 0.0                          

In [586]:
flow_ch_fr = (
    pl.read_csv("crossborder_physical_flow_mw_CH_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_fr"})
    .sort("date")
)

flow_ch_fr.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_fr ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 553.0                         ┆ 1   │
│ 2019-01-01 01:00 ┆ 233.0                         ┆ 1   │
│ 2019-01-01 02:00 ┆ 280.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 56.0                          ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

In [587]:
flow_ch_it = (
    pl.read_csv("crossborder_physical_flow_mw_CH_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_it"})
    .sort("date")
)

flow_ch_it.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_it ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1468.0                        ┆ 1   │
│ 2019-01-01 01:00 ┆ 1162.0                        ┆ 1   │
│ 2019-01-01 02:00 ┆ 931.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 1303.0                        ┆ 1   │
│ 2019-01-01 04:00 ┆ 1225.0                        ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 4691.0                        ┆ 1   │
│ 2024-01-31 20:00 ┆ 4682.0                        ┆ 1   │
│ 2024-01-31 21:00 ┆ 4496.0                        ┆ 1   │
│ 2024-01-31 22:00 ┆ 4545.0                        ┆ 1   │
│ 2024-01-31 23:00 ┆ 4645.0                        ┆ 1   │
└──────────────────┴─────────────────

In [588]:
flow_de_lu_ch = (
    pl.read_csv("crossborder_physical_flow_mw_DE_LU_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_de_lu_ch"})
    .sort("date")
)

flow_de_lu_ch.pipe(print)

shape: (110_368, 3)
┌──────────────────┬──────────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_de_lu_ch ┆ dst │
│ ---              ┆ ---                              ┆ --- │
│ str              ┆ f64                              ┆ i16 │
╞══════════════════╪══════════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 3652.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 3536.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 3677.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 3714.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 3427.0                           ┆ 1   │
│ …                ┆ …                                ┆ …   │
│ 2024-01-31 22:45 ┆ 2845.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 3305.0                           ┆ 1   │
│ 2024-01-31 23:15 ┆ 3449.0                           ┆ 1   │
│ 2024-01-31 23:30 ┆ 3209.0                           ┆ 1   │
│ 2024-01-31 23:45 ┆ 3010.0                       

In [589]:
flow_fr_ch = (
    pl.read_csv("crossborder_physical_flow_mw_FR_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_fr_ch"})
    .sort("date")
)

flow_fr_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_fr_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 115.0                         ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 1139.0                        ┆ 1   │
│ 2024-01-31 20:00 ┆ 1154.0                        ┆ 1   │
│ 2024-01-31 21:00 ┆ 1546.0                        ┆ 1   │
│ 2024-01-31 22:00 ┆ 1693.0                        ┆ 1   │
│ 2024-01-31 23:00 ┆ 1905.0                        ┆ 1   │
└──────────────────┴─────────────────

In [590]:
flow_it_ch = (
    pl.read_csv("crossborder_physical_flow_mw_IT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_it_ch"})
    .sort("date")
)

flow_it_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_it_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

### Cross Border Capacity Day Ahead Forecast

In [591]:
capacity_forecast_at_ch = (
    pl.read_csv("crossborder_capacity_forecast_AT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_at_ch"})
    .sort("date")
)

capacity_forecast_at_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_at_ch ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1200.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [592]:
capacity_forecast_ch_at = (
    pl.read_csv("crossborder_capacity_forecast_CH_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_at"})
    .sort("date")
)

capacity_forecast_ch_at.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_at ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 01:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 02:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 03:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 04:00 ┆ 700.0                   ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [593]:
capacity_forecast_ch_de_lu = (
    pl.read_csv("crossborder_capacity_forecast_CH_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_de_lu"})
    .sort("date")
)

capacity_forecast_ch_de_lu.pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_de_lu ┆ dst │
│ ---              ┆ ---                        ┆ --- │
│ str              ┆ f64                        ┆ i16 │
╞══════════════════╪════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 04:00 ┆ 4000.0                     ┆ 1   │
│ …                ┆ …                          ┆ …   │
│ 2024-01-31 19:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 20:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 21:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 22:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 23:00 ┆ 4000.0                     ┆ 1   │
└──────────────────┴────────────────────────────┴─────┘


In [594]:
capacity_forecast_ch_fr = (
    pl.read_csv("crossborder_capacity_forecast_CH_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_fr"})
    .sort("date")
)

capacity_forecast_ch_fr.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_fr ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1200.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1300.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [595]:
capacity_forecast_ch_it = (
    pl.read_csv("crossborder_capacity_forecast_CH_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_it"})
    .sort("date")
)

capacity_forecast_ch_it.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_it ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 2513.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 4322.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 4322.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 4137.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 4137.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 3620.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [596]:
capacity_forecast_de_lu_ch = (
    pl.read_csv("crossborder_capacity_forecast_DE_LU_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_de_lu_ch"})
    .sort("date")
)

capacity_forecast_de_lu_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────────────────┬─────┐
│ date             ┆ capacity_forecast_de_lu_ch ┆ dst │
│ ---              ┆ ---                        ┆ --- │
│ str              ┆ f64                        ┆ i16 │
╞══════════════════╪════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 01:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 02:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 03:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 04:00 ┆ 800.0                      ┆ 1   │
│ …                ┆ …                          ┆ …   │
│ 2024-01-31 19:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 20:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 21:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 22:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 23:00 ┆ 800.0                      ┆ 1   │
└──────────────────┴────────────────────────────┴─────┘


In [598]:
capacity_forecast_fr_ch = (
    pl.read_csv("crossborder_capacity_forecast_FR_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_FR_CH"})
    .sort("date")
)

capacity_forecast_fr_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_FR_CH ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 3000.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 3200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [599]:
capacity_forecast_it_ch = (
    pl.read_csv("crossborder_capacity_forecast_IT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_it_ch"})
    .sort("date")
)

capacity_forecast_it_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_it_ch ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1910.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1910.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


### Joining everything

In [607]:
df_final = (
    jao_chde.drop("day")
    .with_columns(
        pl.col("date")
        .str.to_datetime()
        .dt.truncate("1w")
        .dt.date()
        .dt.strftime("%Y-%m-%d")
        .alias("week_start")
    )
    .select(["date", "week_start", "dst", "auction_price", "allocatedCapacity", "ATC"])
    # Day Ahead Prices
    .join(da_at, how="left", on=["date", "dst"])
    .join(da_ch, how="left", on=["date", "dst"])
    .join(da_delu, how="left", on=["date", "dst"])
    .join(da_fr, how="left", on=["date", "dst"])
    # Actual Load
    .join(load_at, how="left", on=["date", "dst"])
    .join(load_ch, how="left", on=["date", "dst"])
    .join(load_de, how="left", on=["date", "dst"])
    .join(load_fr, how="left", on=["date", "dst"])
    .join(load_it, how="left", on=["date", "dst"])
    # Forecast Wind and Solar
    .join(wind_solar_forecast_at, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_ch, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_de, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_fr, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_it, how="left", on=["date", "dst"])
    # Actual Generation All Types
    .join(generation_at, how="left", on=["date", "dst"])
    .join(generation_ch, how="left", on=["date", "dst"])
    .join(generation_de, how="left", on=["date", "dst"])
    .join(generation_fr, how="left", on=["date", "dst"])
    .join(generation_it, how="left", on=["date", "dst"])
    # Hydro Reservoir Storage
    .join(hydro_storage_at, how="left", on=["week_start"])
    .join(hydro_storage_ch, how="left", on=["week_start"])
    .join(hydro_storage_fr, how="left", on=["week_start"])
    .join(hydro_storage_it, how="left", on=["week_start"])
    # Cross Border Physical Flow
    .join(flow_at_ch, how="left", on=["date", "dst"])
    .join(flow_ch_at, how="left", on=["date", "dst"])
    .join(flow_ch_de_lu, how="left", on=["date", "dst"])
    .join(flow_ch_fr, how="left", on=["date", "dst"])
    .join(flow_ch_it, how="left", on=["date", "dst"])
    .join(flow_de_lu_ch, how="left", on=["date", "dst"])
    .join(flow_fr_ch, how="left", on=["date", "dst"])
    .join(flow_it_ch, how="left", on=["date", "dst"])
    # Cross Border Capacities Forecast
    .join(capacity_forecast_at_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_at, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_de_lu, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_fr, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_it, how="left", on=["date", "dst"])
    .join(capacity_forecast_de_lu_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_fr_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_it_ch, how="left", on=["date", "dst"])
    # Drop
    .drop(["week_start"])
)

df_final

date,dst,auction_price,allocatedCapacity,ATC,day_ahead_price_at,day_ahead_price_ch,day_ahead_price_de,day_ahead_price_fr,actual_load_at,actual_load_ch,actual_load_de,actual_load_fr,actual_load_it,solar_forecast_at,wind_onshore_forecast_at,solar_forecast_ch,wind_onshore_forecast_ch,solar_forecast_de,wind_onshore_forecast_de,wind_offshore_forecast_de,solar_forecast_fr,wind_onshore_forecast_fr,wind_offshore_forecast_fr,solar_forecast_it,wind_onshore_forecast_it,biomass_actual_aggregated_at,biomass_actual_consumption_at,fossil_gas_actual_aggregated_at,fossil_gas_actual_consumption_at,fossil_hard_coal_actual_aggregated_at,fossil_hard_coal_actual_consumption_at,fossil_oil_actual_aggregated_at,fossil_oil_actual_consumption_at,geothermal_actual_aggregated_at,geothermal_actual_consumption_at,hydro_pumped_storage_actual_aggregated_at,…,fossil_gas_actual_aggregated_it,fossil_hard_coal_actual_aggregated_it,fossil_hard_coal_actual_consumption_it,fossil_oil_actual_aggregated_it,fossil_oil_actual_consumption_it,geothermal_actual_aggregated_it,hydro_pumped_storage_actual_aggregated_it,hydro_pumped_storage_actual_consumption_it,hydro_run-of-river_and_poundage_actual_aggregated_it,hydro_water_reservoir_actual_aggregated_it,other_actual_aggregated_it,other_actual_consumption_it,solar_actual_aggregated_it,solar_actual_consumption_it,waste_actual_aggregated_it,wind_offshore_actual_aggregated_it,wind_onshore_actual_aggregated_it,hydro_reservoir_storage_at,hydro_reservoir_storage_ch,hydro_reservoir_storage_fr,hydro_reservoir_storage_it,crossborder_actual_flow_at_ch,crossborder_actual_flow_ch_at,crossborder_actual_flow_ch_de_lu,crossborder_actual_flow_ch_fr,crossborder_actual_flow_ch_it,crossborder_actual_flow_de_lu_ch,crossborder_actual_flow_fr_ch,crossborder_actual_flow_it_ch,capacity_forecast_at_ch,capacity_forecast_ch_at,capacity_forecast_ch_de_lu,capacity_forecast_ch_fr,capacity_forecast_ch_it,capacity_forecast_de_lu_ch,capacity_forecast_FR_CH,capacity_forecast_it_ch
str,i16,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2019-01-01 02:…",1,0.0,4232,4233,39.78,47.24,-4.08,39.78,5619.25,7244.0,40537.0,58540.0,21600.0,0.0,338.0,0.0,2.0,0.0,22355.5,5028.5,0.0,1675.0,,0.0,,304.0,0.0,618.75,0.0,155.5,0.0,0.0,0.0,0.0,0.0,0.0,…,7090.0,2177.0,,8.0,,673.0,0.0,138.0,1613.0,164.0,2859.0,,0.0,,36.0,,5086.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,427.0,0.0,512.0,280.0,931.0,3677.0,0.0,0.0,1200.0,700.0,4000.0,1200.0,2513.0,800.0,3000.0,1910.0
"""2019-01-01 08:…",1,0.0,4232,4233,1.53,28.7,-4.93,25.0,6203.75,7160.0,40120.5,52929.0,21199.0,22.0,634.0,3.0,2.0,79.25,29267.0,4907.75,0.0,1840.0,,745.0,,304.0,0.0,446.5,0.0,155.75,0.0,0.0,0.0,0.0,0.0,0.0,…,5174.0,1706.0,,8.0,,672.0,0.0,878.0,1877.0,272.0,2283.0,,1052.0,,33.0,,4210.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,759.0,0.0,567.0,0.0,2431.0,4408.0,57.0,0.0,1200.0,700.0,4000.0,1200.0,2513.0,800.0,3000.0,1910.0
"""2019-01-01 10:…",1,0.0,4232,4233,14.34,37.53,-4.93,34.64,6884.0,7354.0,45375.5,55677.0,23695.0,94.0,845.0,35.0,2.0,1460.75,30461.5,4923.5,924.0,2047.0,,3866.0,,304.0,0.0,504.5,0.0,154.75,0.0,0.0,0.0,0.0,0.0,0.0,…,5172.0,1777.0,,8.0,,673.0,170.0,930.0,2028.0,259.0,2108.0,,4601.0,,37.0,,3262.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,801.0,0.0,545.0,0.0,2417.0,4365.0,164.0,0.0,1200.0,700.0,4000.0,1200.0,2513.0,800.0,3000.0,1910.0
"""2019-01-01 20:…",1,0.0,4232,4233,32.63,55.05,-6.98,61.27,7030.75,7104.0,51846.5,62360.0,29874.0,0.0,2359.0,0.0,8.0,0.0,36429.75,3924.75,0.0,3332.0,,0.0,,304.0,0.0,617.75,0.0,153.5,0.0,0.0,0.0,0.0,0.0,0.0,…,9757.0,1804.0,,67.0,,669.0,1217.0,1.0,3217.0,1122.0,2886.0,,0.0,,39.0,,3091.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,1098.0,0.0,572.0,0.0,2992.0,4105.0,26.0,0.0,1200.0,700.0,4000.0,1200.0,2321.0,800.0,3000.0,1910.0
"""2019-01-02 14:…",1,0.0,4233,4233,50.85,59.4,49.64,55.64,8775.75,7372.0,64764.25,68572.0,35263.0,119.0,2854.0,33.0,10.0,3676.75,24910.25,3716.75,2384.0,4113.0,,2537.0,,300.0,0.0,2214.5,0.0,153.75,0.0,0.0,0.0,0.0,0.0,78.0,…,11538.0,2422.0,,67.0,,659.0,0.0,2.0,1913.0,330.0,3164.0,,3738.0,,39.0,,4997.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,1292.0,0.0,629.0,0.0,3156.0,4107.0,228.0,0.0,1200.0,700.0,4000.0,1200.0,2814.0,800.0,3100.0,1810.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""2024-02-03 01:…",1,0.0,4344,4350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,
"""2024-02-03 03:…",1,0.0,4346,4350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,
"""2024-02-03 05:…",1,0.0,4346,4350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,
"""2024-02-03 20:…",1,0.0,4335,4350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,


In [609]:
df_final.to_pandas().to_csv("0_df_final_ch-de.csv", index=False)

In [611]:
df_final = (
    jao_dech.drop("day")
    .with_columns(
        pl.col("date")
        .str.to_datetime()
        .dt.truncate("1w")
        .dt.date()
        .dt.strftime("%Y-%m-%d")
        .alias("week_start")
    )
    .select(["date", "week_start", "dst", "auction_price", "allocatedCapacity", "ATC"])
    # Day Ahead Prices
    .join(da_at, how="left", on=["date", "dst"])
    .join(da_ch, how="left", on=["date", "dst"])
    .join(da_delu, how="left", on=["date", "dst"])
    .join(da_fr, how="left", on=["date", "dst"])
    # Actual Load
    .join(load_at, how="left", on=["date", "dst"])
    .join(load_ch, how="left", on=["date", "dst"])
    .join(load_de, how="left", on=["date", "dst"])
    .join(load_fr, how="left", on=["date", "dst"])
    .join(load_it, how="left", on=["date", "dst"])
    # Forecast Wind and Solar
    .join(wind_solar_forecast_at, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_ch, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_de, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_fr, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_it, how="left", on=["date", "dst"])
    # Actual Generation All Types
    .join(generation_at, how="left", on=["date", "dst"])
    .join(generation_ch, how="left", on=["date", "dst"])
    .join(generation_de, how="left", on=["date", "dst"])
    .join(generation_fr, how="left", on=["date", "dst"])
    .join(generation_it, how="left", on=["date", "dst"])
    # Hydro Reservoir Storage
    .join(hydro_storage_at, how="left", on=["week_start"])
    .join(hydro_storage_ch, how="left", on=["week_start"])
    .join(hydro_storage_fr, how="left", on=["week_start"])
    .join(hydro_storage_it, how="left", on=["week_start"])
    # Cross Border Physical Flow
    .join(flow_at_ch, how="left", on=["date", "dst"])
    .join(flow_ch_at, how="left", on=["date", "dst"])
    .join(flow_ch_de_lu, how="left", on=["date", "dst"])
    .join(flow_ch_fr, how="left", on=["date", "dst"])
    .join(flow_ch_it, how="left", on=["date", "dst"])
    .join(flow_de_lu_ch, how="left", on=["date", "dst"])
    .join(flow_fr_ch, how="left", on=["date", "dst"])
    .join(flow_it_ch, how="left", on=["date", "dst"])
    # Cross Border Capacities Forecast
    .join(capacity_forecast_at_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_at, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_de_lu, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_fr, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_it, how="left", on=["date", "dst"])
    .join(capacity_forecast_de_lu_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_fr_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_it_ch, how="left", on=["date", "dst"])
    # Drop
    .drop(["week_start"])
)

df_final

date,dst,auction_price,allocatedCapacity,ATC,day_ahead_price_at,day_ahead_price_ch,day_ahead_price_de,day_ahead_price_fr,actual_load_at,actual_load_ch,actual_load_de,actual_load_fr,actual_load_it,solar_forecast_at,wind_onshore_forecast_at,solar_forecast_ch,wind_onshore_forecast_ch,solar_forecast_de,wind_onshore_forecast_de,wind_offshore_forecast_de,solar_forecast_fr,wind_onshore_forecast_fr,wind_offshore_forecast_fr,solar_forecast_it,wind_onshore_forecast_it,biomass_actual_aggregated_at,biomass_actual_consumption_at,fossil_gas_actual_aggregated_at,fossil_gas_actual_consumption_at,fossil_hard_coal_actual_aggregated_at,fossil_hard_coal_actual_consumption_at,fossil_oil_actual_aggregated_at,fossil_oil_actual_consumption_at,geothermal_actual_aggregated_at,geothermal_actual_consumption_at,hydro_pumped_storage_actual_aggregated_at,…,fossil_gas_actual_aggregated_it,fossil_hard_coal_actual_aggregated_it,fossil_hard_coal_actual_consumption_it,fossil_oil_actual_aggregated_it,fossil_oil_actual_consumption_it,geothermal_actual_aggregated_it,hydro_pumped_storage_actual_aggregated_it,hydro_pumped_storage_actual_consumption_it,hydro_run-of-river_and_poundage_actual_aggregated_it,hydro_water_reservoir_actual_aggregated_it,other_actual_aggregated_it,other_actual_consumption_it,solar_actual_aggregated_it,solar_actual_consumption_it,waste_actual_aggregated_it,wind_offshore_actual_aggregated_it,wind_onshore_actual_aggregated_it,hydro_reservoir_storage_at,hydro_reservoir_storage_ch,hydro_reservoir_storage_fr,hydro_reservoir_storage_it,crossborder_actual_flow_at_ch,crossborder_actual_flow_ch_at,crossborder_actual_flow_ch_de_lu,crossborder_actual_flow_ch_fr,crossborder_actual_flow_ch_it,crossborder_actual_flow_de_lu_ch,crossborder_actual_flow_fr_ch,crossborder_actual_flow_it_ch,capacity_forecast_at_ch,capacity_forecast_ch_at,capacity_forecast_ch_de_lu,capacity_forecast_ch_fr,capacity_forecast_ch_it,capacity_forecast_de_lu_ch,capacity_forecast_FR_CH,capacity_forecast_it_ch
str,i16,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2019-01-01 09:…",1,36.29,567.0,567.0,0.41,35.7,-6.33,25.0,6544.5,7185.0,42666.75,53830.0,22341.0,52.0,746.0,17.0,2.0,655.25,29854.0,4922.5,151.0,1943.0,,2325.0,,300.0,0.0,458.75,0.0,155.0,0.0,0.0,0.0,0.0,0.0,0.0,…,5209.0,1725.0,,8.0,,673.0,0.0,1281.0,2030.0,257.0,2169.0,,2840.0,,34.0,,3656.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,803.0,0.0,584.0,0.0,2623.0,4322.0,461.0,0.0,1200.0,700.0,4000.0,1200.0,2513.0,800.0,3000.0,1910.0
"""2019-01-01 13:…",1,35.46,567.0,567.0,8.85,43.07,-0.02,41.06,7040.75,7726.0,49276.5,60029.0,23970.0,156.0,1220.0,45.0,4.0,1915.75,32344.0,4416.75,2602.0,2408.0,,4317.0,,304.0,0.0,452.25,0.0,154.75,0.0,0.0,0.0,0.0,0.0,0.0,…,5802.0,1729.0,,8.0,,669.0,198.0,143.0,1726.0,178.0,2374.0,,4925.0,,40.0,,3460.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,559.0,0.0,626.0,276.0,1317.0,4092.0,0.0,0.0,1200.0,700.0,4000.0,1200.0,2513.0,800.0,3000.0,1910.0
"""2019-01-01 23:…",1,47.78,567.0,567.0,-4.95,55.11,-28.93,54.91,6225.25,6992.0,46421.5,61763.0,23904.0,0.0,2533.0,0.0,8.0,0.0,35527.25,4000.75,0.0,3707.0,,0.0,,304.0,0.0,383.25,0.0,153.25,0.0,0.0,0.0,0.0,0.0,0.0,…,8930.0,1751.0,,66.0,,668.0,0.0,3.0,1994.0,189.0,2644.0,,0.0,,38.0,,2362.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,869.0,0.0,612.0,0.0,2522.0,4231.0,214.0,0.0,1200.0,700.0,4000.0,1200.0,3008.0,800.0,3000.0,1910.0
"""2019-01-02 10:…",1,9.88,567.0,567.0,50.91,64.8,47.57,61.09,8747.5,7168.0,65119.5,69164.0,36056.0,95.0,2712.0,36.0,7.0,3934.25,27793.5,4824.75,963.0,4307.0,,3282.0,,300.0,0.0,2160.25,0.0,153.25,0.0,0.0,0.0,0.0,0.0,339.25,…,13377.0,2789.0,,69.0,,665.0,453.0,1.0,2814.0,755.0,3458.0,,3273.0,,37.0,,1735.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,1057.0,0.0,609.0,3.0,3326.0,3865.0,0.0,0.0,1200.0,700.0,4000.0,1200.0,2814.0,800.0,3100.0,1810.0
"""2019-01-02 11:…",1,8.0,567.0,567.0,48.78,64.29,43.94,63.63,8830.5,7271.0,66463.25,69929.0,36123.0,145.0,2765.0,56.0,7.0,5856.0,27971.5,4751.5,1905.0,4280.0,,3908.0,,300.0,0.0,2406.0,0.0,153.5,0.0,0.0,0.0,0.0,0.0,0.0,…,13739.0,2779.0,,74.0,,667.0,31.0,1.0,2600.0,700.0,3387.0,,3921.0,,39.0,,1718.0,1.412194e6,4.656491e6,2.186488e6,3.036299e6,961.0,0.0,631.0,0.0,3232.0,3826.0,137.0,0.0,1200.0,700.0,4000.0,1200.0,2814.0,800.0,3100.0,1810.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""2024-02-01 22:…",1,2.44,508.0,508.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,
"""2024-02-02 02:…",1,12.6,508.0,508.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,
"""2024-02-02 06:…",1,13.25,787.0,787.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,
"""2024-02-02 17:…",1,16.81,508.0,508.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,1.073592e6,2.47213e6,2.010901e6,2.619505e6,,,,,,,,,,,,,,,,


In [612]:
df_final.to_pandas().to_csv("0_df_final_de-ch.csv", index=False)

### Converting to UTC

In [61]:
(
    pl.read_csv("../00_Data Retrieval and Cleaning/0_df_final_ch-de.csv")
    .sort("date")
    .with_columns(pl.col("date").str.to_datetime().dt.replace_time_zone("Europe/Zurich", ambiguous="earliest"))
    .with_columns(pl.col("date").is_duplicated().alias("tmp"))
    .with_columns(
        pl.when((pl.col("tmp") == True) & (pl.col("dst") == 0))
        .then(pl.col("date") + pl.duration(hours=1))
        .otherwise(pl.col("date"))
        .alias("date")
    )
    .with_columns(pl.col("date").dt.convert_time_zone("UTC"))
    .drop("tmp")
    .to_pandas()
    .to_csv("0_df_final_ch-de_UTC.csv", index=False)
)

In [None]:
(
    pl.read_csv("../00_Data Retrieval and Cleaning/0_df_final_de-ch.csv")
    .sort("date")
    .with_columns(pl.col("date").str.to_datetime().dt.replace_time_zone("Europe/Zurich", ambiguous="earliest"))
    .with_columns(pl.col("date").is_duplicated().alias("tmp"))
    .with_columns(
        pl.when((pl.col("tmp") == True) & (pl.col("dst") == 0))
        .then(pl.col("date") + pl.duration(hours=1))
        .otherwise(pl.col("date"))
        .alias("date")
    )
    .with_columns(pl.col("date").dt.convert_time_zone("UTC"))
    .drop("tmp")
    .to_pandas()
    .to_csv("0_df_final_de-ch_UTC.csv", index=False)
)