# Joining the individual datasets

In [242]:
import pandas as pd
import polars as pl
import polars.selectors as cs
import numpy as np
import plotnine as pn
from mizani.formatters import comma_format, custom_format, currency_format, percent_format
from IPython.display import clear_output, display
import os
import glob
import requests
from datetime import datetime, timedelta
import pytz
from tqdm.notebook import tqdm

jama_colour = [
    "#374e55",
    "#df8f44",
    "#00a1d5",
    "#b24745",
    "#79af97",
]

pd.set_option("display.max.columns", 500)
pd.set_option("display.max.columns", 500)
background_colour = "#F2F2F2"
pn.theme_set(
    pn.theme_classic()
    + pn.theme(
        figure_size=[7, 7/1.618],
        text=pn.element_text(family="monospace"),
        plot_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        panel_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        legend_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
    )
)


%load_ext blackcellmagic
%matplotlib inline

The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


### Auction Prices

In [243]:
jao_chde = (
    pl.from_pandas(pd.read_csv("./Raw Data/jao_prices_CH-DE.csv"))
    .select(pl.exclude(["date", "corridor", "hour"]))
    .with_columns(
        pl.col("delivery_begin_time_ch")
        .str.to_datetime()
        .dt.convert_time_zone("Europe/Zurich")
        .alias("date")
    )
    .with_columns(pl.col("date").dt.date().alias("day"))
    .drop("delivery_begin_time_ch")
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "auction_price"})
)

jao_dech = (
    pl.from_pandas(pd.read_csv("./Raw Data/jao_prices_DE-CH.csv"))
    .select(pl.exclude(["date", "corridor", "hour"]))
    .with_columns(
        pl.col("delivery_begin_time_ch")
        .str.to_datetime()
        .dt.convert_time_zone("Europe/Zurich")
        .alias("date")
    )
    .with_columns(pl.col("date").dt.date().alias("day"))
    .drop("delivery_begin_time_ch")
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "auction_price"})
)

jao_chde

auction_price,allocatedCapacity,ATC,date,day,dst
f64,i64,i64,str,date,i16
0.01,4222,4222,"""2023-03-06 11:…",2023-03-06,1
0.08,3005,3005,"""2019-07-18 15:…",2019-07-18,0
0.0,4082,4085,"""2019-05-12 22:…",2019-05-12,0
0.0,4408,4410,"""2021-01-06 00:…",2021-01-06,1
0.03,4101,4103,"""2021-04-13 19:…",2021-04-13,0
…,…,…,…,…,…
0.03,3139,3140,"""2019-09-23 13:…",2019-09-23,0
0.05,4035,4035,"""2019-05-25 05:…",2019-05-25,0
0.05,3412,3413,"""2021-05-14 15:…",2021-05-14,0
0.0,4213,4223,"""2023-04-09 10:…",2023-04-09,0


In [244]:
jao_chde.filter(pl.col("date") == "2019-10-27 02:00")

auction_price,allocatedCapacity,ATC,date,day,dst
f64,i64,i64,str,date,i16
0.0,3863,3863,"""2019-10-27 02:…",2019-10-27,0
0.0,3857,3863,"""2019-10-27 02:…",2019-10-27,1


In [245]:
jao_chde["date"].value_counts(sort=True).filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:00 ┆ 2     │
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└──────────────────┴───────┘


In [246]:
jao_dech["date"].value_counts(sort=True).filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:00 ┆ 2     │
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└──────────────────┴───────┘


Now the observations are uniquely identifiable.

### Day-Ahead Prices 

- Aggregation: These values are already in hourly format
- Join ID: date

In [247]:
da_at = (
    pl.read_csv("./Raw Data/day_ahead_prices_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_at"})
)

da_at.pipe(print)
da_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_at ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2020-11-21 05:00 ┆ 31.03              ┆ 1   │
│ 2020-03-07 06:00 ┆ 26.25              ┆ 1   │
│ 2022-06-07 18:00 ┆ 200.0              ┆ 0   │
│ 2021-06-26 02:00 ┆ 84.33              ┆ 0   │
│ 2019-02-28 03:00 ┆ 28.02              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2020-06-26 21:00 ┆ 39.9               ┆ 0   │
│ 2021-11-06 16:00 ┆ 167.34             ┆ 1   │
│ 2022-05-05 07:00 ┆ 275.06             ┆ 0   │
│ 2022-10-24 12:00 ┆ 125.05             ┆ 0   │
│ 2023-01-16 23:00 ┆ 110.64             ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [248]:
da_ch = (
    pl.read_csv("./Raw Data/day_ahead_prices_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_ch"})
)

da_ch.pipe(print)
da_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_ch ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-11-21 11:00 ┆ 59.05              ┆ 1   │
│ 2023-03-10 20:00 ┆ 135.22             ┆ 1   │
│ 2019-03-02 23:00 ┆ 32.87              ┆ 1   │
│ 2023-07-24 02:00 ┆ 73.85              ┆ 0   │
│ 2020-07-02 09:00 ┆ 42.98              ┆ 0   │
│ …                ┆ …                  ┆ …   │
│ 2019-03-17 17:00 ┆ 13.0               ┆ 1   │
│ 2022-01-31 11:00 ┆ 255.08             ┆ 1   │
│ 2019-12-24 14:00 ┆ 31.98              ┆ 1   │
│ 2023-06-01 18:00 ┆ 82.64              ┆ 0   │
│ 2019-07-11 23:00 ┆ 41.49              ┆ 0   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [249]:
da_delu = (
    pl.read_csv("./Raw Data/day_ahead_prices_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_de"})
)

da_delu.pipe(print)
da_delu["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_de ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-03-30 08:00 ┆ 34.93              ┆ 1   │
│ 2020-01-30 22:00 ┆ 27.3               ┆ 1   │
│ 2022-10-26 10:00 ┆ 133.31             ┆ 0   │
│ 2024-01-02 19:00 ┆ 63.18              ┆ 1   │
│ 2019-05-19 00:00 ┆ 39.66              ┆ 0   │
│ …                ┆ …                  ┆ …   │
│ 2023-05-29 07:00 ┆ 58.71              ┆ 0   │
│ 2020-11-12 14:00 ┆ 44.04              ┆ 1   │
│ 2021-09-29 19:00 ┆ 188.12             ┆ 0   │
│ 2023-06-30 02:00 ┆ 96.56              ┆ 0   │
│ 2023-11-12 10:00 ┆ 88.61              ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [250]:
da_fr = (
    pl.read_csv("./Raw Data/day_ahead_prices_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_fr"})
)

da_fr.pipe(print)
da_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_fr ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-09-20 03:00 ┆ 20.84              ┆ 0   │
│ 2020-09-20 19:00 ┆ 51.7               ┆ 0   │
│ 2019-02-24 08:00 ┆ 39.11              ┆ 1   │
│ 2023-02-19 12:00 ┆ 110.9              ┆ 1   │
│ 2019-01-13 04:00 ┆ 27.08              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2021-01-16 13:00 ┆ 57.14              ┆ 1   │
│ 2021-04-12 04:00 ┆ 53.95              ┆ 0   │
│ 2021-01-02 20:00 ┆ 60.03              ┆ 1   │
│ 2019-12-20 00:00 ┆ 26.56              ┆ 1   │
│ 2021-10-30 00:00 ┆ 100.0              ┆ 0   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [251]:
da_it = (
    pl.read_csv("./Raw Data/day_ahead_prices_IT_NORD.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_it"})
)

da_it.pipe(print)
da_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_it ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2020-12-11 00:00 ┆ 41.05              ┆ 1   │
│ 2021-07-22 01:00 ┆ 91.52              ┆ 0   │
│ 2022-12-25 03:00 ┆ 126.75             ┆ 1   │
│ 2020-12-23 13:00 ┆ 55.69              ┆ 1   │
│ 2022-02-10 04:00 ┆ 188.08             ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2019-02-27 15:00 ┆ 55.53              ┆ 1   │
│ 2023-05-15 03:00 ┆ 101.32             ┆ 0   │
│ 2023-11-10 08:00 ┆ 165.95             ┆ 1   │
│ 2024-01-05 23:00 ┆ 86.8               ┆ 1   │
│ 2021-03-05 00:00 ┆ 50.33              ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

### Actual Load

In [252]:
load_at = (
    pl.read_csv("./Raw Data/actual_load_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_at"})
)

load_at.pipe(print)
load_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_at ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 6075.0         ┆ 1   │
│ 2019-01-01 01:00 ┆ 5852.75        ┆ 1   │
│ 2019-01-01 02:00 ┆ 5619.25        ┆ 1   │
│ 2019-01-01 03:00 ┆ 5324.0         ┆ 1   │
│ 2019-01-01 04:00 ┆ 5273.5         ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 8433.0         ┆ 1   │
│ 2024-01-31 20:00 ┆ 7892.75        ┆ 1   │
│ 2024-01-31 21:00 ┆ 7312.0         ┆ 1   │
│ 2024-01-31 22:00 ┆ 7065.0         ┆ 1   │
│ 2024-01-31 23:00 ┆ 6610.25        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [253]:
load_ch = (
    pl.read_csv("./Raw Data/actual_load_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_ch"})
)

load_ch.pipe(print)
load_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_ch ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 7037.0         ┆ 1   │
│ 2019-01-01 01:00 ┆ 7096.0         ┆ 1   │
│ 2019-01-01 02:00 ┆ 7244.0         ┆ 1   │
│ 2019-01-01 03:00 ┆ 7443.0         ┆ 1   │
│ 2019-01-01 04:00 ┆ 7353.0         ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 8055.0         ┆ 1   │
│ 2024-01-31 20:00 ┆ 7774.0         ┆ 1   │
│ 2024-01-31 21:00 ┆ 7417.0         ┆ 1   │
│ 2024-01-31 22:00 ┆ 6997.0         ┆ 1   │
│ 2024-01-31 23:00 ┆ 6821.0         ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [254]:
load_de = (
    pl.read_csv("./Raw Data/actual_load_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_de"})
)

load_de.pipe(print)
load_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_de ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 43713.5        ┆ 1   │
│ 2019-01-01 01:00 ┆ 42091.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 40537.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 39725.75       ┆ 1   │
│ 2019-01-01 04:00 ┆ 39502.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 69814.25       ┆ 1   │
│ 2024-01-31 20:00 ┆ 66137.75       ┆ 1   │
│ 2024-01-31 21:00 ┆ 62751.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 60155.25       ┆ 1   │
│ 2024-01-31 23:00 ┆ 56119.75       ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [255]:
load_fr = (
    pl.read_csv("./Raw Data/actual_load_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_fr"})
)

load_fr.pipe(print)
load_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_518, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_fr ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 62176.0        ┆ 1   │
│ 2019-01-01 01:00 ┆ 60301.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 58540.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 55144.0        ┆ 1   │
│ 2019-01-01 04:00 ┆ 52978.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 63343.0        ┆ 1   │
│ 2024-01-31 20:00 ┆ 64827.0        ┆ 1   │
│ 2024-01-31 21:00 ┆ 61646.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 58398.0        ┆ 1   │
│ 2024-01-31 23:00 ┆ 58517.0        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (2, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└───────────────

In [256]:
load_it = (
    pl.read_csv("./Raw Data/actual_load_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_it"})
)

load_it.pipe(print)
load_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_it ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 23644.0        ┆ 1   │
│ 2019-01-01 01:00 ┆ 22850.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 21600.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 20255.0        ┆ 1   │
│ 2019-01-01 04:00 ┆ 19459.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 43670.0        ┆ 1   │
│ 2024-01-31 20:00 ┆ 41362.0        ┆ 1   │
│ 2024-01-31 21:00 ┆ 38122.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 34165.0        ┆ 1   │
│ 2024-01-31 23:00 ┆ 30578.0        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

### Forecast Wind and Solar

In [257]:
wind_solar_forecast_at = (
    pl.read_csv("./Raw Data/wind_solar_forecast_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_at",
            "wind_onshore_forecast": "wind_onshore_forecast_at",
        }
    )
)

wind_solar_forecast_at.pipe(print)
wind_solar_forecast_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_at ┆ wind_onshore_forecast_at ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 487.0                    ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 398.0                    ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 338.0                    ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 309.0                    ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ 320.0                    ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ 131.0                    ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ 152.0                    ┆ 1   │
│ 2024-01-31 21:00 

In [258]:
wind_solar_forecast_ch = (
    pl.read_csv("./Raw Data/wind_solar_forecast_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_ch",
            "wind_onshore_forecast": "wind_onshore_forecast_ch",
        }
    )
)

wind_solar_forecast_ch.pipe(print)
wind_solar_forecast_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_ch ┆ wind_onshore_forecast_ch ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ 0.0                      ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ 0.0                      ┆ 1   │
│ 2024-01-31 21:00 

In [259]:
wind_solar_forecast_de = (
    pl.read_csv("./Raw Data/wind_solar_forecast_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
        pl.mean("wind_offshore_forecast")
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_de",
            "wind_offshore_forecast": "wind_offshore_forecast_de",
            "wind_onshore_forecast": "wind_onshore_forecast_de",
        }
    )
)

wind_solar_forecast_de.pipe(print)
wind_solar_forecast_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_546, 5)
┌──────────────────┬───────────────────┬──────────────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_de ┆ wind_onshore_forecast_de ┆ wind_offshore_forecast_d ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ e                        ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ ---                      ┆ i16 │
│                  ┆                   ┆                          ┆ f64                      ┆     │
╞══════════════════╪═══════════════════╪══════════════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 18879.75                 ┆ 5069.0                   ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 20626.5                  ┆ 5042.25                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 22355.5                  ┆ 5028.5                   ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 24032.5                  ┆ 4977

In [260]:
wind_solar_forecast_fr = (
    pl.read_csv("./Raw Data/wind_solar_forecast_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
        pl.mean("wind_offshore_forecast")
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_fr",
            "wind_offshore_forecast": "wind_offshore_forecast_fr",
            "wind_onshore_forecast": "wind_onshore_forecast_fr",
        }
    )
)

wind_solar_forecast_fr.pipe(print)
wind_solar_forecast_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_344, 5)
┌──────────────────┬───────────────────┬──────────────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_fr ┆ wind_onshore_forecast_fr ┆ wind_offshore_forecast_f ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ r                        ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ ---                      ┆ i16 │
│                  ┆                   ┆                          ┆ str                      ┆     │
╞══════════════════╪═══════════════════╪══════════════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 1698.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 1680.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 1675.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 1670.0                   ┆ null

In [261]:
wind_solar_forecast_it = (
    pl.read_csv("./Raw Data/wind_solar_forecast_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_it",
            "wind_onshore_forecast": "wind_onshore_forecast_it",
        }
    )
)

wind_solar_forecast_it.pipe(print)
wind_solar_forecast_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (43_314, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_it ┆ wind_onshore_forecast_it ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ str                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2024-01-31 21:00 

### Generation Actuals (All Types)

#### Austria

In [262]:
generation_at = pl.read_csv("./Raw Data/generation_actual_all_AT.csv", has_header=False)
generation_at.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26,column_27
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Biomass""","""Fossil Gas""","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Other""","""Other""","""Other renewabl…","""Other renewabl…","""Solar""","""Solar""","""Waste""","""Waste""","""Wind Onshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…"
"""2019-01-01 00:…","""304.0""","""0.0""","""1339.0""","""0.0""","""154.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""1577.0""","""2699.0""","""0.0""","""140.0""","""0.0""","""22.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""100.0""","""0.0""","""288.0""","""0.0"""


In [263]:
column_names = (
    generation_at.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("AT")], separator=" ").str.to_lowercase().str.replace_all(" ", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_at',
 'biomass_actual_consumption_at',
 'fossil_gas_actual_aggregated_at',
 'fossil_gas_actual_consumption_at',
 'fossil_hard_coal_actual_aggregated_at',
 'fossil_hard_coal_actual_consumption_at',
 'fossil_oil_actual_aggregated_at',
 'fossil_oil_actual_consumption_at',
 'geothermal_actual_aggregated_at',
 'geothermal_actual_consumption_at',
 'hydro_pumped_storage_actual_aggregated_at',
 'hydro_pumped_storage_actual_consumption_at',
 'hydro_run-of-river_and_poundage_actual_aggregated_at',
 'hydro_run-of-river_and_poundage_actual_consumption_at',
 'hydro_water_reservoir_actual_aggregated_at',
 'hydro_water_reservoir_actual_consumption_at',
 'other_actual_aggregated_at',
 'other_actual_consumption_at',
 'other_renewable_actual_aggregated_at',
 'other_renewable_actual_consumption_at',
 'solar_actual_aggregated_at',
 'solar_actual_consumption_at',
 'waste_actual_aggregated_at',
 'waste_actual_consumption_at',
 'wind_onshore_actual_aggregated_at',
 'wind_

In [264]:
generation_at = (
    generation_at.tail(-2)
    .rename(dict(zip(generation_at.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .group_by("date")
    .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [265]:
generation_at.pipe(print)
generation_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 28)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ biomass_ac ┆ fossil_gas ┆ … ┆ waste_actu ┆ wind_onsh ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ tual_consu ┆ _actual_ag ┆   ┆ al_consump ┆ ore_actua ┆ ore_actua ┆ --- │
│ str        ┆ gated_at   ┆ mption_at  ┆ gregated_a ┆   ┆ tion_at    ┆ l_aggrega ┆ l_consump ┆ i16 │
│            ┆ ---        ┆ ---        ┆ t          ┆   ┆ ---        ┆ ted_a…    ┆ tion_…    ┆     │
│            ┆ f64        ┆ f64        ┆ ---        ┆   ┆ f64        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆            ┆ f64        ┆   ┆            ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 304.0      ┆ 0.0        ┆ 1282.75    ┆ … ┆ 0.0        ┆ 248.0     ┆ 0.0       ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### Switzerland

In [266]:
generation_ch = pl.read_csv("./Raw Data/generation_actual_all_CH.csv", has_header=False)
generation_ch.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7
str,str,str,str,str,str,str
"""date""","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Nuclear""","""Solar""","""Wind Onshore"""
"""2019-01-01 00:…","""81.0""","""123.0""","""481.0""","""3243.0""","""0.0""","""1.0"""
"""2019-01-01 01:…","""76.0""","""124.0""","""393.0""","""3243.0""","""0.0""","""0.0"""


In [267]:
column_names = (
    generation_ch.head(1)
    .transpose()
    .with_columns(pl.col("column_0").str.strip_chars())
    .with_columns(pl.col("column_0").str.to_lowercase().str.replace_all(" ", "_"))
    .with_columns(pl.concat_str([pl.col("column_0"), pl.lit("_ch")], separator=""))[
        "column_0"
    ]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'hydro_pumped_storage_ch',
 'hydro_run-of-river_and_poundage_ch',
 'hydro_water_reservoir_ch',
 'nuclear_ch',
 'solar_ch',
 'wind_onshore_ch']

In [268]:
generation_ch = (
    generation_ch.tail(-1)
    .rename(dict(zip(generation_ch.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [269]:
generation_ch.pipe(print)
generation_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 8)
┌─────────────┬─────────────┬─────────────┬─────────────┬────────────┬──────────┬────────────┬─────┐
│ date        ┆ hydro_pumpe ┆ hydro_run-o ┆ hydro_water ┆ nuclear_ch ┆ solar_ch ┆ wind_onsho ┆ dst │
│ ---         ┆ d_storage_c ┆ f-river_and ┆ _reservoir_ ┆ ---        ┆ ---      ┆ re_ch      ┆ --- │
│ str         ┆ h           ┆ _poundage_… ┆ ch          ┆ f64        ┆ f64      ┆ ---        ┆ i16 │
│             ┆ ---         ┆ ---         ┆ ---         ┆            ┆          ┆ f64        ┆     │
│             ┆ f64         ┆ f64         ┆ f64         ┆            ┆          ┆            ┆     │
╞═════════════╪═════════════╪═════════════╪═════════════╪════════════╪══════════╪════════════╪═════╡
│ 2019-01-01  ┆ 81.0        ┆ 123.0       ┆ 481.0       ┆ 3243.0     ┆ 0.0      ┆ 1.0        ┆ 1   │
│ 00:00       ┆             ┆             ┆             ┆            ┆          ┆            ┆     │
│ 2019-01-01  ┆ 76.0        ┆ 124.0       ┆ 393.0       ┆ 3243.0     ┆ 0

#### Germany

In [270]:
generation_de = pl.read_csv("./Raw Data/generation_actual_all_DE_LU.csv", has_header=False)
generation_de.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Brown c…","""Fossil Coal-de…","""Fossil Gas""","""Fossil Gas""","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Nuclear""","""Nuclear""","""Other""","""Other renewabl…","""Other renewabl…","""Solar""","""Solar""","""Waste""","""Wind Offshore""","""Wind Onshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…"
"""2019-01-01 00:…","""4812.0""","""6932.0""","""273.0""","""3410.0""","""1.0""","""3345.0""","""482.0""",,"""19.0""","""350.0""","""1827.0""","""1485.0""","""132.0""",,"""9002.0""",,"""475.0""","""107.0""",,"""0.0""",,"""783.0""","""3177.0""","""19366.0""",


In [271]:
column_names = (
    generation_de.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("DE")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_de',
 'fossil_brown_coal_lignite_actual_aggregated_de',
 'fossil_coal-derived_gas_actual_aggregated_de',
 'fossil_gas_actual_aggregated_de',
 'fossil_gas_actual_consumption_de',
 'fossil_hard_coal_actual_aggregated_de',
 'fossil_oil_actual_aggregated_de',
 'fossil_oil_actual_consumption_de',
 'geothermal_actual_aggregated_de',
 'hydro_pumped_storage_actual_aggregated_de',
 'hydro_pumped_storage_actual_consumption_de',
 'hydro_run-of-river_and_poundage_actual_aggregated_de',
 'hydro_water_reservoir_actual_aggregated_de',
 'hydro_water_reservoir_actual_consumption_de',
 'nuclear_actual_aggregated_de',
 'nuclear_actual_consumption_de',
 'other_actual_aggregated_de',
 'other_renewable_actual_aggregated_de',
 'other_renewable_actual_consumption_de',
 'solar_actual_aggregated_de',
 'solar_actual_consumption_de',
 'waste_actual_aggregated_de',
 'wind_offshore_actual_aggregated_de',
 'wind_onshore_actual_aggregated_de',
 'wind_onshore_actual_consumption_de'

In [272]:
generation_de = (
    generation_de.tail(-2)
    .rename(dict(zip(generation_de.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .group_by("date")
    .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [273]:
generation_de.pipe(print)
generation_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 27)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_bro ┆ fossil_coa ┆ … ┆ wind_offsh ┆ wind_onsh ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ wn_coal_li ┆ l-derived_ ┆   ┆ ore_actual ┆ ore_actua ┆ ore_actua ┆ --- │
│ str        ┆ gated_de   ┆ gnite_actu ┆ gas_actual ┆   ┆ _aggregate ┆ l_aggrega ┆ l_consump ┆ i16 │
│            ┆ ---        ┆ al…        ┆ _a…        ┆   ┆ d_…        ┆ ted_d…    ┆ tion_…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ ---        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆ f64        ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 4831.25    ┆ 6335.0     ┆ 429.0      ┆ … ┆ 3134.0     ┆ 20401.5   ┆ null      ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### France

In [274]:
generation_fr = pl.read_csv("./Raw Data/generation_actual_all_FR.csv", has_header=False)
generation_fr.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Nuclear""","""Solar""","""Waste""","""Wind Offshore""","""Wind Offshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…"
"""2019-01-01 00:…","""351.0""","""2722.0""","""0.0""",,"""207.0""",,"""1377.0""","""3552.0""","""1054.0""",,"""55627.0""","""0.0""","""253.0""",,,"""1622.0"""


In [275]:
column_names = (
    generation_fr.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("FR")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_fr',
 'fossil_gas_actual_aggregated_fr',
 'fossil_hard_coal_actual_aggregated_fr',
 'fossil_hard_coal_actual_consumption_fr',
 'fossil_oil_actual_aggregated_fr',
 'hydro_pumped_storage_actual_aggregated_fr',
 'hydro_pumped_storage_actual_consumption_fr',
 'hydro_run-of-river_and_poundage_actual_aggregated_fr',
 'hydro_water_reservoir_actual_aggregated_fr',
 'hydro_water_reservoir_actual_consumption_fr',
 'nuclear_actual_aggregated_fr',
 'solar_actual_aggregated_fr',
 'waste_actual_aggregated_fr',
 'wind_offshore_actual_aggregated_fr',
 'wind_offshore_actual_consumption_fr',
 'wind_onshore_actual_aggregated_fr']

In [276]:
generation_fr = (
    generation_fr.tail(-2)
    .rename(dict(zip(generation_fr.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    # .group_by("date")
    # .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [277]:
generation_fr.pipe(print)
generation_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_555, 18)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_gas ┆ fossil_har ┆ … ┆ wind_offsh ┆ wind_offs ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ _actual_ag ┆ d_coal_act ┆   ┆ ore_actual ┆ hore_actu ┆ ore_actua ┆ --- │
│ str        ┆ gated_fr   ┆ gregated_f ┆ ual_aggreg ┆   ┆ _aggregate ┆ al_consum ┆ l_aggrega ┆ i16 │
│            ┆ ---        ┆ r          ┆ at…        ┆   ┆ d_…        ┆ ption…    ┆ ted_f…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ ---        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆ f64        ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 351.0      ┆ 2722.0     ┆ 0.0        ┆ … ┆ null       ┆ null      ┆ 1622.0    ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### Italy

In [278]:
generation_it = pl.read_csv("./Raw Data/generation_actual_all_IT.csv", has_header=False)
generation_it.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Coal-de…","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Other""","""Other""","""Solar""","""Solar""","""Waste""","""Wind Offshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…"
"""2019-01-01 00:…","""495.0""","""776.0""","""8053.0""","""1938.0""",,"""8.0""",,"""674.0""","""1.0""","""29.0""","""1891.0""","""289.0""","""3070.0""",,"""0.0""",,"""37.0""",,"""5076.0"""


In [279]:
column_names = (
    generation_it.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("IT")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_it',
 'fossil_coal-derived_gas_actual_aggregated_it',
 'fossil_gas_actual_aggregated_it',
 'fossil_hard_coal_actual_aggregated_it',
 'fossil_hard_coal_actual_consumption_it',
 'fossil_oil_actual_aggregated_it',
 'fossil_oil_actual_consumption_it',
 'geothermal_actual_aggregated_it',
 'hydro_pumped_storage_actual_aggregated_it',
 'hydro_pumped_storage_actual_consumption_it',
 'hydro_run-of-river_and_poundage_actual_aggregated_it',
 'hydro_water_reservoir_actual_aggregated_it',
 'other_actual_aggregated_it',
 'other_actual_consumption_it',
 'solar_actual_aggregated_it',
 'solar_actual_consumption_it',
 'waste_actual_aggregated_it',
 'wind_offshore_actual_aggregated_it',
 'wind_onshore_actual_aggregated_it']

In [280]:
generation_it = (
    generation_it.tail(-2)
    .rename(dict(zip(generation_it.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    # .group_by("date")
    # .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [281]:
generation_it.pipe(print)
generation_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 21)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_coa ┆ fossil_gas ┆ … ┆ waste_actu ┆ wind_offs ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ l-derived_ ┆ _actual_ag ┆   ┆ al_aggrega ┆ hore_actu ┆ ore_actua ┆ --- │
│ str        ┆ gated_it   ┆ gas_actual ┆ gregated_i ┆   ┆ ted_it     ┆ al_aggreg ┆ l_aggrega ┆ i16 │
│            ┆ ---        ┆ _a…        ┆ t          ┆   ┆ ---        ┆ ated_…    ┆ ted_i…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ f64        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆            ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 495.0      ┆ 776.0      ┆ 8053.0     ┆ … ┆ 37.0       ┆ null      ┆ 5076.0    ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

### Water Reservoirs

In [282]:
hydro_storage_at = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_at", "date": "week_start"})
)

hydro_storage_at.pipe(print)

shape: (265, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_at │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 1.412194e6                 │
│ 2019-01-07 ┆ 1.372937e6                 │
│ 2019-01-14 ┆ 1.326312e6                 │
│ 2019-01-21 ┆ 1.176602e6                 │
│ 2019-01-28 ┆ 1.077808e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 1.543144e6                 │
│ 2024-01-08 ┆ 1.423063e6                 │
│ 2024-01-15 ┆ 1.26075e6                  │
│ 2024-01-22 ┆ 1.157852e6                 │
│ 2024-01-29 ┆ 1.073592e6                 │
└────────────┴────────────────────────────┘


In [283]:
hydro_storage_ch = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_ch", "date": "week_start"})
)

hydro_storage_ch.pipe(print)

shape: (265, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_ch │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 4.656491e6                 │
│ 2019-01-07 ┆ 4.420007e6                 │
│ 2019-01-14 ┆ 4.154307e6                 │
│ 2019-01-21 ┆ 3.693327e6                 │
│ 2019-01-28 ┆ 3.288999e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 4.253319e6                 │
│ 2024-01-08 ┆ 4.031253e6                 │
│ 2024-01-15 ┆ 2.996454e6                 │
│ 2024-01-22 ┆ 3.326345e6                 │
│ 2024-01-29 ┆ 2.47213e6                  │
└────────────┴────────────────────────────┘


In [284]:
hydro_storage_fr = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_fr", "date": "week_start"})
)

hydro_storage_fr.pipe(print)

shape: (263, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_fr │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 2.186488e6                 │
│ 2019-01-07 ┆ 2.055622e6                 │
│ 2019-01-14 ┆ 1.931241e6                 │
│ 2019-01-21 ┆ 1.750281e6                 │
│ 2019-01-28 ┆ 1.591274e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 2.795844e6                 │
│ 2024-01-08 ┆ 2.368286e6                 │
│ 2024-01-15 ┆ 2.201546e6                 │
│ 2024-01-22 ┆ 2.146234e6                 │
│ 2024-01-29 ┆ 2.010901e6                 │
└────────────┴────────────────────────────┘


In [285]:
hydro_storage_it = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_it", "date": "week_start"})
)

hydro_storage_it.pipe(print)

shape: (264, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_it │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 3.036299e6                 │
│ 2019-01-07 ┆ 2.704739e6                 │
│ 2019-01-14 ┆ 2.593794e6                 │
│ 2019-01-21 ┆ 2.49019e6                  │
│ 2019-01-28 ┆ 2.473671e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 3.280575e6                 │
│ 2024-01-08 ┆ 3.053817e6                 │
│ 2024-01-15 ┆ 2.926584e6                 │
│ 2024-01-22 ┆ 2.778409e6                 │
│ 2024-01-29 ┆ 2.619505e6                 │
└────────────┴────────────────────────────┘


### Crossborder Flows

In [286]:
filenames = [filename for filename in os.listdir("./Raw Data/") if "physical_flow" in filename]
filenames

['crossborder_physical_flow_mw_AT_CH.csv',
 'crossborder_physical_flow_mw_CH_AT.csv',
 'crossborder_physical_flow_mw_CH_DE_LU.csv',
 'crossborder_physical_flow_mw_CH_FR.csv',
 'crossborder_physical_flow_mw_CH_IT.csv',
 'crossborder_physical_flow_mw_DE_LU_CH.csv',
 'crossborder_physical_flow_mw_FR_CH.csv',
 'crossborder_physical_flow_mw_IT_CH.csv']

In [287]:
flow_at_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_AT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_at_ch"})
    .sort("date")
)

flow_at_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_at_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 763.0                         ┆ 1   │
│ 2019-01-01 01:00 ┆ 497.0                         ┆ 1   │
│ 2019-01-01 02:00 ┆ 427.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 452.0                         ┆ 1   │
│ 2019-01-01 04:00 ┆ 603.0                         ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 194.0                         ┆ 1   │
│ 2024-01-31 20:00 ┆ 504.0                         ┆ 1   │
│ 2024-01-31 21:00 ┆ 678.0                         ┆ 1   │
│ 2024-01-31 22:00 ┆ 726.0                         ┆ 1   │
│ 2024-01-31 23:00 ┆ 952.0                         ┆ 1   │
└──────────────────┴─────────────────

In [288]:
flow_ch_at = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_at"})
    .sort("date")
)

flow_ch_at.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_at ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

In [289]:
flow_ch_de_lu = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_de_lu"})
    .sort("date")
)

flow_ch_de_lu.pipe(print)

shape: (110_368, 3)
┌──────────────────┬──────────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_de_lu ┆ dst │
│ ---              ┆ ---                              ┆ --- │
│ str              ┆ f64                              ┆ i16 │
╞══════════════════╪══════════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 595.0                            ┆ 1   │
│ 2019-01-01 01:00 ┆ 502.0                            ┆ 1   │
│ 2019-01-01 02:00 ┆ 512.0                            ┆ 1   │
│ 2019-01-01 03:00 ┆ 544.0                            ┆ 1   │
│ 2019-01-01 04:00 ┆ 451.0                            ┆ 1   │
│ …                ┆ …                                ┆ …   │
│ 2024-01-31 22:45 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:15 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:30 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:45 ┆ 0.0                          

In [290]:
flow_ch_fr = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_fr"})
    .sort("date")
)

flow_ch_fr.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_fr ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 553.0                         ┆ 1   │
│ 2019-01-01 01:00 ┆ 233.0                         ┆ 1   │
│ 2019-01-01 02:00 ┆ 280.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 56.0                          ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

In [291]:
flow_ch_it = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_it"})
    .sort("date")
)

flow_ch_it.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_it ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1468.0                        ┆ 1   │
│ 2019-01-01 01:00 ┆ 1162.0                        ┆ 1   │
│ 2019-01-01 02:00 ┆ 931.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 1303.0                        ┆ 1   │
│ 2019-01-01 04:00 ┆ 1225.0                        ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 4691.0                        ┆ 1   │
│ 2024-01-31 20:00 ┆ 4682.0                        ┆ 1   │
│ 2024-01-31 21:00 ┆ 4496.0                        ┆ 1   │
│ 2024-01-31 22:00 ┆ 4545.0                        ┆ 1   │
│ 2024-01-31 23:00 ┆ 4645.0                        ┆ 1   │
└──────────────────┴─────────────────

In [292]:
flow_de_lu_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_DE_LU_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_de_lu_ch"})
    .sort("date")
)

flow_de_lu_ch.pipe(print)

shape: (110_368, 3)
┌──────────────────┬──────────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_de_lu_ch ┆ dst │
│ ---              ┆ ---                              ┆ --- │
│ str              ┆ f64                              ┆ i16 │
╞══════════════════╪══════════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 3652.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 3536.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 3677.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 3714.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 3427.0                           ┆ 1   │
│ …                ┆ …                                ┆ …   │
│ 2024-01-31 22:45 ┆ 2845.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 3305.0                           ┆ 1   │
│ 2024-01-31 23:15 ┆ 3449.0                           ┆ 1   │
│ 2024-01-31 23:30 ┆ 3209.0                           ┆ 1   │
│ 2024-01-31 23:45 ┆ 3010.0                       

In [293]:
flow_fr_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_FR_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_fr_ch"})
    .sort("date")
)

flow_fr_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_fr_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 115.0                         ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 1139.0                        ┆ 1   │
│ 2024-01-31 20:00 ┆ 1154.0                        ┆ 1   │
│ 2024-01-31 21:00 ┆ 1546.0                        ┆ 1   │
│ 2024-01-31 22:00 ┆ 1693.0                        ┆ 1   │
│ 2024-01-31 23:00 ┆ 1905.0                        ┆ 1   │
└──────────────────┴─────────────────

In [294]:
flow_it_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_IT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_it_ch"})
    .sort("date")
)

flow_it_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_it_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

### Cross Border Capacity Day Ahead Forecast

In [295]:
capacity_forecast_at_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_AT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_at_ch"})
    .sort("date")
)

capacity_forecast_at_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_at_ch ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1200.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [296]:
capacity_forecast_ch_at = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_at"})
    .sort("date")
)

capacity_forecast_ch_at.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_at ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 01:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 02:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 03:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 04:00 ┆ 700.0                   ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [297]:
capacity_forecast_ch_de_lu = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_de_lu"})
    .sort("date")
)

capacity_forecast_ch_de_lu.pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_de_lu ┆ dst │
│ ---              ┆ ---                        ┆ --- │
│ str              ┆ f64                        ┆ i16 │
╞══════════════════╪════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 04:00 ┆ 4000.0                     ┆ 1   │
│ …                ┆ …                          ┆ …   │
│ 2024-01-31 19:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 20:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 21:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 22:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 23:00 ┆ 4000.0                     ┆ 1   │
└──────────────────┴────────────────────────────┴─────┘


In [298]:
capacity_forecast_ch_fr = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_fr"})
    .sort("date")
)

capacity_forecast_ch_fr.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_fr ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1200.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1300.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [299]:
capacity_forecast_ch_it = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_it"})
    .sort("date")
)

capacity_forecast_ch_it.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_it ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 2513.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 4322.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 4322.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 4137.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 4137.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 3620.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [300]:
capacity_forecast_de_lu_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_DE_LU_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_de_lu_ch"})
    .sort("date")
)

capacity_forecast_de_lu_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────────────────┬─────┐
│ date             ┆ capacity_forecast_de_lu_ch ┆ dst │
│ ---              ┆ ---                        ┆ --- │
│ str              ┆ f64                        ┆ i16 │
╞══════════════════╪════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 01:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 02:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 03:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 04:00 ┆ 800.0                      ┆ 1   │
│ …                ┆ …                          ┆ …   │
│ 2024-01-31 19:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 20:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 21:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 22:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 23:00 ┆ 800.0                      ┆ 1   │
└──────────────────┴────────────────────────────┴─────┘




In [301]:
capacity_forecast_fr_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_FR_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_FR_CH"})
    .sort("date")
)

capacity_forecast_fr_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_FR_CH ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 3000.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 3200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [302]:
capacity_forecast_it_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_IT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_it_ch"})
    .sort("date")
)

capacity_forecast_it_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_it_ch ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1910.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1910.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


### Joining everything

In [303]:
df_final = (
    jao_chde.drop("day")
    .with_columns(
        pl.col("date")
        .str.to_datetime()
        .dt.truncate("1w")
        .dt.date()
        .dt.strftime("%Y-%m-%d")
        .alias("week_start")
    )
    .select(["date", "week_start", "dst", "auction_price", "allocatedCapacity", "ATC"])
    # Day Ahead Prices
    .join(da_at, how="left", on=["date", "dst"])
    .join(da_ch, how="left", on=["date", "dst"])
    .join(da_delu, how="left", on=["date", "dst"])
    .join(da_fr, how="left", on=["date", "dst"])
    .join(da_it, how="left", on=["date", "dst"])
    # Actual Load
    .join(load_at, how="left", on=["date", "dst"])
    .join(load_ch, how="left", on=["date", "dst"])
    .join(load_de, how="left", on=["date", "dst"])
    .join(load_fr, how="left", on=["date", "dst"])
    .join(load_it, how="left", on=["date", "dst"])
    # Forecast Wind and Solar
    .join(wind_solar_forecast_at, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_ch, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_de, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_fr, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_it, how="left", on=["date", "dst"])
    # Actual Generation All Types
    .join(generation_at, how="left", on=["date", "dst"])
    .join(generation_ch, how="left", on=["date", "dst"])
    .join(generation_de, how="left", on=["date", "dst"])
    .join(generation_fr, how="left", on=["date", "dst"])
    .join(generation_it, how="left", on=["date", "dst"])
    # Hydro Reservoir Storage
    .join(hydro_storage_at, how="left", on=["week_start"])
    .join(hydro_storage_ch, how="left", on=["week_start"])
    .join(hydro_storage_fr, how="left", on=["week_start"])
    .join(hydro_storage_it, how="left", on=["week_start"])
    # Cross Border Physical Flow
    .join(flow_at_ch, how="left", on=["date", "dst"])
    .join(flow_ch_at, how="left", on=["date", "dst"])
    .join(flow_ch_de_lu, how="left", on=["date", "dst"])
    .join(flow_ch_fr, how="left", on=["date", "dst"])
    .join(flow_ch_it, how="left", on=["date", "dst"])
    .join(flow_de_lu_ch, how="left", on=["date", "dst"])
    .join(flow_fr_ch, how="left", on=["date", "dst"])
    .join(flow_it_ch, how="left", on=["date", "dst"])
    # Cross Border Capacities Forecast
    .join(capacity_forecast_at_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_at, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_de_lu, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_fr, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_it, how="left", on=["date", "dst"])
    .join(capacity_forecast_de_lu_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_fr_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_it_ch, how="left", on=["date", "dst"])
    # Drop
    .drop(["week_start"])
)

df_final

date,dst,auction_price,allocatedCapacity,ATC,day_ahead_price_at,day_ahead_price_ch,day_ahead_price_de,day_ahead_price_fr,day_ahead_price_it,actual_load_at,actual_load_ch,actual_load_de,actual_load_fr,actual_load_it,solar_forecast_at,wind_onshore_forecast_at,solar_forecast_ch,wind_onshore_forecast_ch,solar_forecast_de,wind_onshore_forecast_de,wind_offshore_forecast_de,solar_forecast_fr,wind_onshore_forecast_fr,wind_offshore_forecast_fr,solar_forecast_it,wind_onshore_forecast_it,biomass_actual_aggregated_at,biomass_actual_consumption_at,fossil_gas_actual_aggregated_at,fossil_gas_actual_consumption_at,fossil_hard_coal_actual_aggregated_at,fossil_hard_coal_actual_consumption_at,fossil_oil_actual_aggregated_at,fossil_oil_actual_consumption_at,geothermal_actual_aggregated_at,geothermal_actual_consumption_at,…,fossil_gas_actual_aggregated_it,fossil_hard_coal_actual_aggregated_it,fossil_hard_coal_actual_consumption_it,fossil_oil_actual_aggregated_it,fossil_oil_actual_consumption_it,geothermal_actual_aggregated_it,hydro_pumped_storage_actual_aggregated_it,hydro_pumped_storage_actual_consumption_it,hydro_run-of-river_and_poundage_actual_aggregated_it,hydro_water_reservoir_actual_aggregated_it,other_actual_aggregated_it,other_actual_consumption_it,solar_actual_aggregated_it,solar_actual_consumption_it,waste_actual_aggregated_it,wind_offshore_actual_aggregated_it,wind_onshore_actual_aggregated_it,hydro_reservoir_storage_at,hydro_reservoir_storage_ch,hydro_reservoir_storage_fr,hydro_reservoir_storage_it,crossborder_actual_flow_at_ch,crossborder_actual_flow_ch_at,crossborder_actual_flow_ch_de_lu,crossborder_actual_flow_ch_fr,crossborder_actual_flow_ch_it,crossborder_actual_flow_de_lu_ch,crossborder_actual_flow_fr_ch,crossborder_actual_flow_it_ch,capacity_forecast_at_ch,capacity_forecast_ch_at,capacity_forecast_ch_de_lu,capacity_forecast_ch_fr,capacity_forecast_ch_it,capacity_forecast_de_lu_ch,capacity_forecast_FR_CH,capacity_forecast_it_ch
str,i16,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2023-03-06 11:…",1,0.01,4222,4222,160.84,160.97,160.84,160.84,160.84,8524.75,8504.0,66896.5,68300.0,39427.0,376.0,165.0,86.0,0.0,9417.5,5918.75,470.25,7609.0,2503.0,,7107.0,,276.0,0.0,2323.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,14617.0,2957.0,,373.0,,585.0,205.0,293.0,1854.0,293.0,2406.0,,6626.0,,27.0,3.0,5399.0,818682.0,2.245255e6,1.35172e6,1.841496e6,1101.0,0.0,553.0,1596.0,2030.0,1016.0,0.0,0.0,900.0,1200.0,4000.0,1200.0,2582.0,1400.0,3200.0,1810.0
"""2019-07-18 15:…",0,0.08,3005,3005,39.72,39.82,39.72,39.72,50.0,7803.5,7908.0,64492.5,52212.0,44676.0,592.0,50.0,245.0,2.0,18735.25,1762.5,1165.25,5247.0,3074.0,,7007.0,,240.0,0.0,2191.75,0.0,268.25,0.0,0.0,0.0,0.0,0.0,…,19083.0,874.0,,79.0,,658.0,341.0,,4722.0,1550.0,3694.0,,7427.0,,47.0,,884.0,1.525238e6,5.059643e6,2.690219e6,4.119959e6,400.0,0.0,360.0,0.0,2613.0,196.0,807.0,0.0,786.0,650.0,3000.0,1100.0,2492.0,2000.0,2300.0,1440.0
"""2019-05-12 22:…",0,0.0,4082,4085,41.6,38.01,41.92,42.71,52.39,6213.75,6290.0,49110.75,47510.0,27096.0,0.0,2582.0,0.0,13.0,0.0,13510.75,2659.25,0.0,4701.0,,0.0,,274.0,0.0,61.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,7844.0,2502.0,,20.0,,660.0,448.0,,5211.0,921.0,2709.0,,0.0,,25.0,,1345.0,420343.0,1.415078e6,1.130279e6,2.415405e6,599.0,0.0,268.0,0.0,1927.0,1313.0,655.0,0.0,1200.0,650.0,4000.0,1200.0,1530.0,1200.0,2700.0,1660.0
"""2021-01-06 00:…",1,0.0,4408,4410,46.28,55.23,44.41,53.73,53.73,6452.5,7673.0,53866.5,72722.0,24100.0,0.0,75.0,0.0,0.0,0.0,11868.25,4413.5,0.0,2180.0,,0.0,,174.0,0.0,1849.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,11638.0,1358.0,,82.0,,624.0,80.0,441.0,3028.0,602.0,1357.0,,0.0,,30.0,,4177.0,1.117055e6,3.816687e6,2.407326e6,3.208963e6,1207.0,0.0,328.0,1179.0,0.0,2608.0,0.0,54.0,900.0,1200.0,4000.0,1400.0,975.0,1200.0,3700.0,1910.0
"""2021-04-13 19:…",0,0.03,4101,4103,79.41,85.7,77.26,85.85,85.85,8585.25,7926.0,67159.75,58127.0,37897.0,12.0,2800.0,15.0,0.0,1312.0,11069.25,3107.5,3512.0,2433.0,,196.0,,168.0,0.0,2095.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,17667.0,1432.0,,32.0,,627.0,743.0,,5470.0,1268.0,2570.0,,147.0,,38.0,,3494.0,275505.0,544032.0,917246.0,2.138508e6,205.0,0.0,298.0,21.0,1312.0,1516.0,0.0,0.0,150.0,150.0,4000.0,1200.0,2953.0,1200.0,2385.0,1810.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""2019-09-23 13:…",0,0.03,3139,3140,49.97,48.79,48.27,45.77,60.0,8612.0,7687.0,68279.5,50764.0,39887.0,432.0,98.0,155.0,9.0,10112.5,5540.75,1228.75,4627.0,2539.0,,5450.0,,220.0,0.0,2232.25,0.0,150.75,0.0,0.0,0.0,0.0,0.0,…,13647.0,1760.0,,90.0,,645.0,417.0,,3571.0,1084.0,3520.0,,5357.0,,39.0,,3987.0,1.850987e6,6.196739e6,2.650572e6,3.496676e6,0.0,639.0,290.0,0.0,2637.0,635.0,1379.0,0.0,942.0,600.0,3000.0,700.0,2906.0,1800.0,2294.0,1440.0
"""2019-05-25 05:…",0,0.05,4035,4035,33.09,33.62,33.09,33.09,40.0,5160.5,5743.0,41676.75,35681.0,23047.0,43.0,165.0,0.0,1.0,185.0,4780.75,1689.75,0.0,1914.0,,0.0,,260.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,6917.0,2074.0,,158.0,,665.0,65.0,79.0,4578.0,668.0,2011.0,,4.0,,23.0,,455.0,425504.0,1.309645e6,1.324928e6,2.59274e6,158.0,0.0,760.0,0.0,1815.0,389.0,1856.0,0.0,680.0,650.0,4000.0,1200.0,2130.0,1756.0,2500.0,1660.0
"""2021-05-14 15:…",0,0.05,3412,3413,60.0,54.04,60.0,60.0,75.05,6794.5,6579.0,55697.75,45276.0,36256.0,513.0,384.0,137.0,0.0,18778.5,5847.5,623.25,6100.0,4230.0,,5674.0,,172.0,0.0,430.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,8483.0,959.0,,20.0,,607.0,2040.0,151.0,5354.0,820.0,2422.0,,4138.0,,35.0,,3770.0,354573.0,593045.0,1.231647e6,2.369558e6,343.0,0.0,50.0,0.0,2160.0,46.0,1823.0,0.0,200.0,450.0,4000.0,1100.0,1468.0,1800.0,2900.0,1440.0
"""2023-04-09 10:…",0,0.0,4213,4223,100.0,94.54,100.0,100.0,139.31,6345.0,6874.0,41927.0,45739.0,23182.0,662.0,201.0,55.0,0.0,23105.25,559.5,495.5,6301.0,1473.0,,6792.0,,180.0,0.0,969.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,7433.0,121.0,,29.0,,615.0,4.0,104.0,1171.0,29.0,1170.0,,7904.0,,328.0,0.0,2098.0,592454.0,1.842572e6,1.512237e6,1.97461e6,364.0,0.0,374.0,0.0,0.0,894.0,67.0,818.0,600.0,1200.0,4000.0,1200.0,1479.0,2000.0,2385.0,1910.0


In [304]:
df_final.to_pandas().to_csv("0_df_final_ch-de.csv", index=False)

In [305]:
df_final = (
    jao_dech.drop("day")
    .with_columns(
        pl.col("date")
        .str.to_datetime()
        .dt.truncate("1w")
        .dt.date()
        .dt.strftime("%Y-%m-%d")
        .alias("week_start")
    )
    .select(["date", "week_start", "dst", "auction_price", "allocatedCapacity", "ATC"])
    # Day Ahead Prices
    .join(da_at, how="left", on=["date", "dst"])
    .join(da_ch, how="left", on=["date", "dst"])
    .join(da_delu, how="left", on=["date", "dst"])
    .join(da_fr, how="left", on=["date", "dst"])
    .join(da_it, how="left", on=["date", "dst"])
    # Actual Load
    .join(load_at, how="left", on=["date", "dst"])
    .join(load_ch, how="left", on=["date", "dst"])
    .join(load_de, how="left", on=["date", "dst"])
    .join(load_fr, how="left", on=["date", "dst"])
    .join(load_it, how="left", on=["date", "dst"])
    # Forecast Wind and Solar
    .join(wind_solar_forecast_at, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_ch, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_de, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_fr, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_it, how="left", on=["date", "dst"])
    # Actual Generation All Types
    .join(generation_at, how="left", on=["date", "dst"])
    .join(generation_ch, how="left", on=["date", "dst"])
    .join(generation_de, how="left", on=["date", "dst"])
    .join(generation_fr, how="left", on=["date", "dst"])
    .join(generation_it, how="left", on=["date", "dst"])
    # Hydro Reservoir Storage
    .join(hydro_storage_at, how="left", on=["week_start"])
    .join(hydro_storage_ch, how="left", on=["week_start"])
    .join(hydro_storage_fr, how="left", on=["week_start"])
    .join(hydro_storage_it, how="left", on=["week_start"])
    # Cross Border Physical Flow
    .join(flow_at_ch, how="left", on=["date", "dst"])
    .join(flow_ch_at, how="left", on=["date", "dst"])
    .join(flow_ch_de_lu, how="left", on=["date", "dst"])
    .join(flow_ch_fr, how="left", on=["date", "dst"])
    .join(flow_ch_it, how="left", on=["date", "dst"])
    .join(flow_de_lu_ch, how="left", on=["date", "dst"])
    .join(flow_fr_ch, how="left", on=["date", "dst"])
    .join(flow_it_ch, how="left", on=["date", "dst"])
    # Cross Border Capacities Forecast
    .join(capacity_forecast_at_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_at, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_de_lu, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_fr, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_it, how="left", on=["date", "dst"])
    .join(capacity_forecast_de_lu_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_fr_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_it_ch, how="left", on=["date", "dst"])
    # Drop
    .drop(["week_start"])
)

df_final

date,dst,auction_price,allocatedCapacity,ATC,day_ahead_price_at,day_ahead_price_ch,day_ahead_price_de,day_ahead_price_fr,day_ahead_price_it,actual_load_at,actual_load_ch,actual_load_de,actual_load_fr,actual_load_it,solar_forecast_at,wind_onshore_forecast_at,solar_forecast_ch,wind_onshore_forecast_ch,solar_forecast_de,wind_onshore_forecast_de,wind_offshore_forecast_de,solar_forecast_fr,wind_onshore_forecast_fr,wind_offshore_forecast_fr,solar_forecast_it,wind_onshore_forecast_it,biomass_actual_aggregated_at,biomass_actual_consumption_at,fossil_gas_actual_aggregated_at,fossil_gas_actual_consumption_at,fossil_hard_coal_actual_aggregated_at,fossil_hard_coal_actual_consumption_at,fossil_oil_actual_aggregated_at,fossil_oil_actual_consumption_at,geothermal_actual_aggregated_at,geothermal_actual_consumption_at,…,fossil_gas_actual_aggregated_it,fossil_hard_coal_actual_aggregated_it,fossil_hard_coal_actual_consumption_it,fossil_oil_actual_aggregated_it,fossil_oil_actual_consumption_it,geothermal_actual_aggregated_it,hydro_pumped_storage_actual_aggregated_it,hydro_pumped_storage_actual_consumption_it,hydro_run-of-river_and_poundage_actual_aggregated_it,hydro_water_reservoir_actual_aggregated_it,other_actual_aggregated_it,other_actual_consumption_it,solar_actual_aggregated_it,solar_actual_consumption_it,waste_actual_aggregated_it,wind_offshore_actual_aggregated_it,wind_onshore_actual_aggregated_it,hydro_reservoir_storage_at,hydro_reservoir_storage_ch,hydro_reservoir_storage_fr,hydro_reservoir_storage_it,crossborder_actual_flow_at_ch,crossborder_actual_flow_ch_at,crossborder_actual_flow_ch_de_lu,crossborder_actual_flow_ch_fr,crossborder_actual_flow_ch_it,crossborder_actual_flow_de_lu_ch,crossborder_actual_flow_fr_ch,crossborder_actual_flow_it_ch,capacity_forecast_at_ch,capacity_forecast_ch_at,capacity_forecast_ch_de_lu,capacity_forecast_ch_fr,capacity_forecast_ch_it,capacity_forecast_de_lu_ch,capacity_forecast_FR_CH,capacity_forecast_it_ch
str,i16,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2020-01-30 13:…",1,5.86,539.0,539.0,42.98,44.05,39.79,40.08,42.1,9180.25,8859.0,74548.5,70738.0,39504.0,343.0,1062.0,74.0,0.0,6491.25,27775.25,5498.0,2211.0,9147.0,,,,208.0,0.0,3642.5,0.0,149.75,0.0,0.0,0.0,0.0,0.0,…,18661.0,1524.0,,116.0,,636.0,73.0,1.0,2403.0,503.0,2836.0,,6829.0,,35.0,,1010.0,1.156977e6,3.522049e6,2.194236e6,2.761989e6,695.0,0.0,397.0,0.0,2639.0,3022.0,1509.0,0.0,1200.0,600.0,4000.0,1200.0,3975.0,800.0,3200.0,1810.0
"""2021-12-06 20:…",1,39.76,377.0,377.0,237.22,291.19,225.8,273.61,273.61,8851.0,9201.0,70598.25,72236.0,42279.0,0.0,1449.0,0.0,0.0,0.0,11901.5,5789.75,0.0,8327.0,,0.0,,136.0,0.0,3606.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,22455.0,1852.0,,207.0,,640.0,376.0,,3346.0,738.0,2673.0,,0.0,,41.0,,6222.0,1.402617e6,3.851389e6,2.059628e6,2.83142e6,40.0,0.0,0.0,0.0,833.0,2255.0,999.0,0.0,900.0,900.0,4000.0,1350.0,3701.0,800.0,3200.0,1810.0
"""2021-12-22 09:…",1,1.97,1692.0,1692.0,517.5,506.23,506.93,555.0,555.0,10083.0,9403.0,69780.25,82404.0,44695.0,140.0,68.0,16.0,0.0,3888.25,4041.25,1174.0,212.0,3092.0,,1826.0,,152.0,0.0,2511.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,26838.0,3070.0,,445.0,,640.0,1123.0,,4640.0,973.0,2276.0,,1463.0,,36.0,,182.0,1.274576e6,3.393697e6,1.92819e6,2.807977e6,629.0,0.0,679.0,2672.0,1822.0,2035.0,0.0,0.0,1200.0,1200.0,4000.0,1400.0,3510.0,2000.0,3200.0,1810.0
"""2023-03-28 06:…",0,9.48,493.0,493.0,129.92,137.07,130.05,130.0,132.8,8070.75,7514.0,59108.75,55786.0,30309.0,6.0,2620.0,1.0,0.0,1.5,15587.5,2358.0,0.0,4318.0,,0.0,,227.0,0.0,914.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,10086.0,2629.0,,521.0,,616.0,140.0,147.0,1537.0,198.0,2201.0,,5.0,1.0,43.0,21.0,5948.0,641040.0,1.943866e6,1.508365e6,1.935619e6,1677.0,0.0,325.0,893.0,1943.0,1549.0,0.0,0.0,600.0,1200.0,4000.0,1200.0,2185.0,800.0,2385.0,1910.0
"""2023-09-14 01:…",0,0.21,1878.0,1878.0,94.65,98.94,94.88,93.21,105.0,5168.75,4942.0,43556.75,40410.0,27371.0,0.0,1062.0,0.0,0.0,0.0,4377.5,590.75,0.0,1504.0,,0.0,,176.0,0.0,441.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,11857.0,1068.0,,15.0,,604.0,195.0,151.0,2993.0,431.0,2017.0,,0.0,,298.0,0.0,1558.0,1.901084e6,4.808295e6,2.818485e6,3.365511e6,81.0,0.0,496.0,0.0,1583.0,217.0,1187.0,0.0,400.0,1200.0,4000.0,1100.0,2591.0,1450.0,2227.0,1660.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""2020-10-30 11:…",1,3.82,494.0,494.0,36.29,40.77,35.33,37.86,44.46,9149.5,7803.0,71559.75,55570.0,38733.0,232.0,747.0,96.0,0.0,3931.0,25757.5,4758.75,3796.0,5447.0,,,,192.0,0.0,1597.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,13151.0,2019.0,,94.0,,626.0,449.0,151.0,4468.0,512.0,2353.0,,7090.0,,31.0,,723.0,1.87501e6,5.829148e6,3.041148e6,3.640905e6,369.0,0.0,0.0,0.0,2559.0,1091.0,1154.0,0.0,1200.0,1200.0,4000.0,1200.0,2505.0,800.0,3200.0,1810.0
"""2020-05-23 02:…",0,0.21,1440.0,1440.0,10.87,13.19,10.87,10.87,10.87,4532.75,6340.0,38650.25,35874.0,22353.0,0.0,223.0,0.0,0.0,0.0,10896.75,4733.5,0.0,1776.0,,0.0,,212.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,6227.0,1284.0,,26.0,,651.0,1.0,165.0,6057.0,658.0,1603.0,,0.0,,42.0,,715.0,844063.0,2.495955e6,2.355412e6,3.293225e6,388.0,0.0,208.0,0.0,1821.0,553.0,1465.0,0.0,900.0,1200.0,4000.0,1200.0,3020.0,1200.0,2755.0,1660.0
"""2023-10-11 23:…",0,26.1,540.0,540.0,94.89,102.31,89.92,103.42,135.0,5804.0,6111.0,48255.25,43021.0,29133.0,0.0,807.0,0.0,0.0,0.0,24316.25,4236.25,0.0,4229.0,,0.0,,145.0,0.0,711.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,15733.0,303.0,,25.0,,627.0,240.0,553.0,2422.0,421.0,1724.0,,0.0,,379.0,0.0,157.0,1.799814e6,4.89927e6,2.613447e6,3.276952e6,460.0,0.0,0.0,0.0,2725.0,1622.0,1852.0,0.0,450.0,450.0,4000.0,1200.0,2428.0,800.0,2385.0,1910.0
"""2023-12-01 20:…",1,0.21,1785.0,1785.0,142.17,141.9,142.37,142.16,139.98,7889.5,7349.0,63722.25,68116.0,37930.0,0.0,436.0,0.0,0.0,0.0,2948.25,1395.25,0.0,3540.0,,0.0,,136.0,0.0,3174.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,19515.0,1087.0,,15.0,,631.0,369.0,36.0,4662.0,507.0,2559.0,,0.0,,327.0,25.0,6739.0,1.707104e6,4.743491e6,2.883329e6,3.539528e6,0.0,142.0,1984.0,1403.0,0.0,0.0,0.0,1039.0,450.0,1200.0,4000.0,1200.0,3926.0,1800.0,2385.0,1810.0


In [306]:
df_final.to_pandas().to_csv("0_df_final_de-ch.csv", index=False)

### Converting to UTC

In [307]:
(
    pl.read_csv("./0_df_final_ch-de.csv")
    .sort("date")
    .with_columns(pl.col("date").str.to_datetime().dt.replace_time_zone("Europe/Zurich", ambiguous="earliest"))
    .with_columns(pl.col("date").is_duplicated().alias("tmp"))
    .with_columns(
        pl.when((pl.col("tmp") == True) & (pl.col("dst") == 0))
        .then(pl.col("date") + pl.duration(hours=1))
        .otherwise(pl.col("date"))
        .alias("date")
    )
    .with_columns(pl.col("date").dt.convert_time_zone("UTC"))
    .drop("tmp")
    .to_pandas()
    .to_csv("0_df_final_ch-de_UTC.csv", index=False)
)

In [308]:
(
    pl.read_csv("0_df_final_de-ch.csv")
    .sort("date")
    .with_columns(pl.col("date").str.to_datetime().dt.replace_time_zone("Europe/Zurich", ambiguous="earliest"))
    .with_columns(pl.col("date").is_duplicated().alias("tmp"))
    .with_columns(
        pl.when((pl.col("tmp") == True) & (pl.col("dst") == 0))
        .then(pl.col("date") + pl.duration(hours=1))
        .otherwise(pl.col("date"))
        .alias("date")
    )
    .with_columns(pl.col("date").dt.convert_time_zone("UTC"))
    .drop("tmp")
    .to_pandas()
    .to_csv("0_df_final_de-ch_UTC.csv", index=False)
)