# Joining the individual datasets

In [175]:
import pandas as pd
import polars as pl
import polars.selectors as cs
import numpy as np
import plotnine as pn
from mizani.formatters import comma_format, custom_format, currency_format, percent_format
from IPython.display import clear_output, display
import os
import glob
import requests
from datetime import datetime, timedelta
import pytz
from tqdm.notebook import tqdm

jama_colour = [
    "#374e55",
    "#df8f44",
    "#00a1d5",
    "#b24745",
    "#79af97",
]

pd.set_option("display.max.columns", 500)
pd.set_option("display.max.columns", 500)
background_colour = "#F2F2F2"
pn.theme_set(
    pn.theme_classic()
    + pn.theme(
        figure_size=[7, 7/1.618],
        text=pn.element_text(family="monospace"),
        plot_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        panel_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        legend_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
    )
)


%load_ext blackcellmagic
%matplotlib inline

The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


### Auction Prices

In [176]:
jao_chde = (
    pl.from_pandas(pd.read_csv("./Raw Data/jao_prices_CH-DE.csv"))
    .select(pl.exclude(["date", "corridor", "hour"]))
    .with_columns(
        pl.col("delivery_begin_time_ch")
        .str.to_datetime()
        .dt.convert_time_zone("Europe/Zurich")
        .alias("date")
    )
    .with_columns(pl.col("date").dt.date().alias("day"))
    .drop("delivery_begin_time_ch")
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "auction_price"})
)

jao_dech = (
    pl.from_pandas(pd.read_csv("./Raw Data/jao_prices_DE-CH.csv"))
    .select(pl.exclude(["date", "corridor", "hour"]))
    .with_columns(
        pl.col("delivery_begin_time_ch")
        .str.to_datetime()
        .dt.convert_time_zone("Europe/Zurich")
        .alias("date")
    )
    .with_columns(pl.col("date").dt.date().alias("day"))
    .drop("delivery_begin_time_ch")
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "auction_price"})
)

jao_chde

auction_price,allocatedCapacity,ATC,date,day,dst
f64,i64,i64,str,date,i16
0.0,4335,4336,"""2021-03-27 19:…",2021-03-27,1
0.0,4060,4070,"""2022-08-03 11:…",2022-08-03,0
0.57,3670,3670,"""2023-07-05 07:…",2023-07-05,0
12.49,3561,3561,"""2023-08-05 21:…",2023-08-05,0
0.06,4094,4095,"""2023-10-07 08:…",2023-10-07,0
…,…,…,…,…,…
0.88,3713,3714,"""2022-12-16 11:…",2022-12-16,1
0.0,4332,4335,"""2022-01-26 02:…",2022-01-26,1
0.09,3364,3365,"""2023-11-06 23:…",2023-11-06,1
0.25,4284,4284,"""2024-01-06 15:…",2024-01-06,1


In [177]:
jao_chde.filter(pl.col("date") == "2019-10-27 02:00")

auction_price,allocatedCapacity,ATC,date,day,dst
f64,i64,i64,str,date,i16
0.0,3857,3863,"""2019-10-27 02:…",2019-10-27,1
0.0,3863,3863,"""2019-10-27 02:…",2019-10-27,0


In [178]:
jao_chde["date"].value_counts(sort=True).filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:00 ┆ 2     │
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└──────────────────┴───────┘


In [179]:
jao_dech["date"].value_counts(sort=True).filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:00 ┆ 2     │
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└──────────────────┴───────┘


Now the observations are uniquely identifiable.

### Day-Ahead Prices 

- Aggregation: These values are already in hourly format
- Join ID: date

In [180]:
da_at = (
    pl.read_csv("./Raw Data/day_ahead_prices_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_at"})
)

da_at.pipe(print)
da_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_at ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2021-11-02 01:00 ┆ 131.95             ┆ 1   │
│ 2019-01-11 13:00 ┆ 66.01              ┆ 1   │
│ 2019-10-06 00:00 ┆ 34.22              ┆ 0   │
│ 2021-11-25 21:00 ┆ 215.08             ┆ 1   │
│ 2021-03-15 16:00 ┆ 60.99              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2022-01-06 00:00 ┆ 131.92             ┆ 1   │
│ 2022-09-05 07:00 ┆ 506.5              ┆ 0   │
│ 2021-01-01 13:00 ┆ 45.49              ┆ 1   │
│ 2021-12-23 06:00 ┆ 391.62             ┆ 1   │
│ 2020-07-11 18:00 ┆ 30.42              ┆ 0   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [181]:
da_ch = (
    pl.read_csv("./Raw Data/day_ahead_prices_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_ch"})
)

da_ch.pipe(print)
da_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_ch ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2022-06-03 16:00 ┆ 175.97             ┆ 0   │
│ 2019-08-02 03:00 ┆ 38.16              ┆ 0   │
│ 2019-08-11 10:00 ┆ 11.72              ┆ 0   │
│ 2020-01-14 04:00 ┆ 39.98              ┆ 1   │
│ 2020-01-23 23:00 ┆ 42.63              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2022-08-08 19:00 ┆ 456.02             ┆ 0   │
│ 2022-11-03 00:00 ┆ 115.03             ┆ 1   │
│ 2023-04-11 19:00 ┆ 152.61             ┆ 0   │
│ 2023-05-06 16:00 ┆ 71.17              ┆ 0   │
│ 2023-05-28 12:00 ┆ -24.57             ┆ 0   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [182]:
da_delu = (
    pl.read_csv("./Raw Data/day_ahead_prices_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_de"})
)

da_delu.pipe(print)
da_delu["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_de ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2019-03-11 15:00 ┆ 37.11              ┆ 1   │
│ 2020-06-17 08:00 ┆ 56.52              ┆ 0   │
│ 2023-09-29 04:00 ┆ 94.03              ┆ 0   │
│ 2021-05-11 17:00 ┆ 69.43              ┆ 0   │
│ 2023-06-08 22:00 ┆ 96.58              ┆ 0   │
│ …                ┆ …                  ┆ …   │
│ 2022-06-22 09:00 ┆ 339.79             ┆ 0   │
│ 2022-05-08 21:00 ┆ 238.35             ┆ 0   │
│ 2023-05-28 07:00 ┆ 64.84              ┆ 0   │
│ 2023-07-13 12:00 ┆ 77.3               ┆ 0   │
│ 2023-12-11 02:00 ┆ 12.72              ┆ 1   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [183]:
da_fr = (
    pl.read_csv("./Raw Data/day_ahead_prices_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_fr"})
)

da_fr.pipe(print)
da_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_fr ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2020-09-27 20:00 ┆ 50.0               ┆ 0   │
│ 2019-10-26 03:00 ┆ 18.67              ┆ 0   │
│ 2020-01-23 23:00 ┆ 42.6               ┆ 1   │
│ 2023-11-02 19:00 ┆ 96.55              ┆ 1   │
│ 2022-01-01 13:00 ┆ 96.8               ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2019-03-01 02:00 ┆ 35.78              ┆ 1   │
│ 2023-05-30 08:00 ┆ 108.64             ┆ 0   │
│ 2019-09-12 19:00 ┆ 44.27              ┆ 0   │
│ 2022-06-29 14:00 ┆ 325.0              ┆ 0   │
│ 2023-06-17 05:00 ┆ 110.0              ┆ 0   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

In [184]:
da_it = (
    pl.read_csv("./Raw Data/day_ahead_prices_IT_NORD.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"price": "day_ahead_price_ch"})
)

da_it.pipe(print)
da_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_569, 3)
┌──────────────────┬────────────────────┬─────┐
│ date             ┆ day_ahead_price_ch ┆ dst │
│ ---              ┆ ---                ┆ --- │
│ str              ┆ f64                ┆ i16 │
╞══════════════════╪════════════════════╪═════╡
│ 2023-11-22 07:00 ┆ 157.81             ┆ 1   │
│ 2019-03-01 11:00 ┆ 50.97              ┆ 1   │
│ 2019-10-27 09:00 ┆ 35.24              ┆ 1   │
│ 2021-09-18 01:00 ┆ 149.97             ┆ 0   │
│ 2022-11-24 19:00 ┆ 370.0              ┆ 1   │
│ …                ┆ …                  ┆ …   │
│ 2022-03-30 03:00 ┆ 235.92             ┆ 0   │
│ 2022-04-07 20:00 ┆ 277.5              ┆ 0   │
│ 2022-06-10 17:00 ┆ 200.59             ┆ 0   │
│ 2023-02-06 06:00 ┆ 175.64             ┆ 1   │
│ 2023-09-14 04:00 ┆ 100.0              ┆ 0   │
└──────────────────┴────────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019

### Actual Load

In [185]:
load_at = (
    pl.read_csv("./Raw Data/actual_load_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_at"})
)

load_at.pipe(print)
load_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_at ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 6075.0         ┆ 1   │
│ 2019-01-01 01:00 ┆ 5852.75        ┆ 1   │
│ 2019-01-01 02:00 ┆ 5619.25        ┆ 1   │
│ 2019-01-01 03:00 ┆ 5324.0         ┆ 1   │
│ 2019-01-01 04:00 ┆ 5273.5         ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 8433.0         ┆ 1   │
│ 2024-01-31 20:00 ┆ 7892.75        ┆ 1   │
│ 2024-01-31 21:00 ┆ 7312.0         ┆ 1   │
│ 2024-01-31 22:00 ┆ 7065.0         ┆ 1   │
│ 2024-01-31 23:00 ┆ 6610.25        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [186]:
load_ch = (
    pl.read_csv("./Raw Data/actual_load_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_ch"})
)

load_ch.pipe(print)
load_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_ch ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 7037.0         ┆ 1   │
│ 2019-01-01 01:00 ┆ 7096.0         ┆ 1   │
│ 2019-01-01 02:00 ┆ 7244.0         ┆ 1   │
│ 2019-01-01 03:00 ┆ 7443.0         ┆ 1   │
│ 2019-01-01 04:00 ┆ 7353.0         ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 8055.0         ┆ 1   │
│ 2024-01-31 20:00 ┆ 7774.0         ┆ 1   │
│ 2024-01-31 21:00 ┆ 7417.0         ┆ 1   │
│ 2024-01-31 22:00 ┆ 6997.0         ┆ 1   │
│ 2024-01-31 23:00 ┆ 6821.0         ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [187]:
load_de = (
    pl.read_csv("./Raw Data/actual_load_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_de"})
)

load_de.pipe(print)
load_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_de ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 43713.5        ┆ 1   │
│ 2019-01-01 01:00 ┆ 42091.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 40537.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 39725.75       ┆ 1   │
│ 2019-01-01 04:00 ┆ 39502.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 69814.25       ┆ 1   │
│ 2024-01-31 20:00 ┆ 66137.75       ┆ 1   │
│ 2024-01-31 21:00 ┆ 62751.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 60155.25       ┆ 1   │
│ 2024-01-31 23:00 ┆ 56119.75       ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

In [188]:
load_fr = (
    pl.read_csv("./Raw Data/actual_load_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_fr"})
)

load_fr.pipe(print)
load_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_518, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_fr ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 62176.0        ┆ 1   │
│ 2019-01-01 01:00 ┆ 60301.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 58540.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 55144.0        ┆ 1   │
│ 2019-01-01 04:00 ┆ 52978.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 63343.0        ┆ 1   │
│ 2024-01-31 20:00 ┆ 64827.0        ┆ 1   │
│ 2024-01-31 21:00 ┆ 61646.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 58398.0        ┆ 1   │
│ 2024-01-31 23:00 ┆ 58517.0        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (2, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2022-10-30 02:00 ┆ 2     │
│ 2023-10-29 02:00 ┆ 2     │
└───────────────

In [189]:
load_it = (
    pl.read_csv("./Raw Data/actual_load_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(pl.mean("actual_load"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename({"actual_load": "actual_load_it"})
)

load_it.pipe(print)
load_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────┬─────┐
│ date             ┆ actual_load_it ┆ dst │
│ ---              ┆ ---            ┆ --- │
│ str              ┆ f64            ┆ i16 │
╞══════════════════╪════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 23644.0        ┆ 1   │
│ 2019-01-01 01:00 ┆ 22850.0        ┆ 1   │
│ 2019-01-01 02:00 ┆ 21600.0        ┆ 1   │
│ 2019-01-01 03:00 ┆ 20255.0        ┆ 1   │
│ 2019-01-01 04:00 ┆ 19459.0        ┆ 1   │
│ …                ┆ …              ┆ …   │
│ 2024-01-31 19:00 ┆ 43670.0        ┆ 1   │
│ 2024-01-31 20:00 ┆ 41362.0        ┆ 1   │
│ 2024-01-31 21:00 ┆ 38122.0        ┆ 1   │
│ 2024-01-31 22:00 ┆ 34165.0        ┆ 1   │
│ 2024-01-31 23:00 ┆ 30578.0        ┆ 1   │
└──────────────────┴────────────────┴─────┘
shape: (5, 2)
┌──────────────────┬───────┐
│ date             ┆ count │
│ ---              ┆ ---   │
│ str              ┆ u32   │
╞══════════════════╪═══════╡
│ 2019-10-27 02:00 ┆ 2     │
│ 2020-10-25 02:00 ┆ 2     │
│ 2021-10-31 02:

### Forecast Wind and Solar

In [190]:
wind_solar_forecast_at = (
    pl.read_csv("./Raw Data/wind_solar_forecast_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_at",
            "wind_onshore_forecast": "wind_onshore_forecast_at",
        }
    )
)

wind_solar_forecast_at.pipe(print)
wind_solar_forecast_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_at ┆ wind_onshore_forecast_at ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 487.0                    ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 398.0                    ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 338.0                    ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 309.0                    ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ 320.0                    ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ 131.0                    ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ 152.0                    ┆ 1   │
│ 2024-01-31 21:00 

In [191]:
wind_solar_forecast_ch = (
    pl.read_csv("./Raw Data/wind_solar_forecast_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_ch",
            "wind_onshore_forecast": "wind_onshore_forecast_ch",
        }
    )
)

wind_solar_forecast_ch.pipe(print)
wind_solar_forecast_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_ch ┆ wind_onshore_forecast_ch ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ 2.0                      ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ 0.0                      ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ 0.0                      ┆ 1   │
│ 2024-01-31 21:00 

In [192]:
wind_solar_forecast_de = (
    pl.read_csv("./Raw Data/wind_solar_forecast_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
        pl.mean("wind_offshore_forecast")
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_de",
            "wind_offshore_forecast": "wind_offshore_forecast_de",
            "wind_onshore_forecast": "wind_onshore_forecast_de",
        }
    )
)

wind_solar_forecast_de.pipe(print)
wind_solar_forecast_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_546, 5)
┌──────────────────┬───────────────────┬──────────────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_de ┆ wind_onshore_forecast_de ┆ wind_offshore_forecast_d ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ e                        ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ ---                      ┆ i16 │
│                  ┆                   ┆                          ┆ f64                      ┆     │
╞══════════════════╪═══════════════════╪══════════════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 18879.75                 ┆ 5069.0                   ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 20626.5                  ┆ 5042.25                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 22355.5                  ┆ 5028.5                   ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 24032.5                  ┆ 4977

In [193]:
wind_solar_forecast_fr = (
    pl.read_csv("./Raw Data/wind_solar_forecast_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
        pl.mean("wind_offshore_forecast")
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_fr",
            "wind_offshore_forecast": "wind_offshore_forecast_fr",
            "wind_onshore_forecast": "wind_onshore_forecast_fr",
        }
    )
)

wind_solar_forecast_fr.pipe(print)
wind_solar_forecast_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_344, 5)
┌──────────────────┬───────────────────┬──────────────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_fr ┆ wind_onshore_forecast_fr ┆ wind_offshore_forecast_f ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ r                        ┆ --- │
│ str              ┆ f64               ┆ f64                      ┆ ---                      ┆ i16 │
│                  ┆                   ┆                          ┆ str                      ┆     │
╞══════════════════╪═══════════════════╪══════════════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ 1698.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ 1680.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ 1675.0                   ┆ null                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ 1670.0                   ┆ null

In [194]:
wind_solar_forecast_it = (
    pl.read_csv("./Raw Data/wind_solar_forecast_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h"))
    .group_by("date")
    .agg(
        pl.mean("solar_forecast"),
        pl.mean("wind_onshore_forecast"),
    )
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
    .rename(
        {
            "solar_forecast": "solar_forecast_it",
            "wind_onshore_forecast": "wind_onshore_forecast_it",
        }
    )
)

wind_solar_forecast_it.pipe(print)
wind_solar_forecast_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (43_314, 4)
┌──────────────────┬───────────────────┬──────────────────────────┬─────┐
│ date             ┆ solar_forecast_it ┆ wind_onshore_forecast_it ┆ dst │
│ ---              ┆ ---               ┆ ---                      ┆ --- │
│ str              ┆ f64               ┆ str                      ┆ i16 │
╞══════════════════╪═══════════════════╪══════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ …                ┆ …                 ┆ …                        ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0               ┆ null                     ┆ 1   │
│ 2024-01-31 21:00 

### Generation Actuals (All Types)

#### Austria

In [195]:
generation_at = pl.read_csv("./Raw Data/generation_actual_all_AT.csv", has_header=False)
generation_at.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26,column_27
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Biomass""","""Fossil Gas""","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Other""","""Other""","""Other renewabl…","""Other renewabl…","""Solar""","""Solar""","""Waste""","""Waste""","""Wind Onshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…"
"""2019-01-01 00:…","""304.0""","""0.0""","""1339.0""","""0.0""","""154.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""1577.0""","""2699.0""","""0.0""","""140.0""","""0.0""","""22.0""","""0.0""","""0.0""","""0.0""","""0.0""","""0.0""","""100.0""","""0.0""","""288.0""","""0.0"""


In [196]:
column_names = (
    generation_at.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("AT")], separator=" ").str.to_lowercase().str.replace_all(" ", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_at',
 'biomass_actual_consumption_at',
 'fossil_gas_actual_aggregated_at',
 'fossil_gas_actual_consumption_at',
 'fossil_hard_coal_actual_aggregated_at',
 'fossil_hard_coal_actual_consumption_at',
 'fossil_oil_actual_aggregated_at',
 'fossil_oil_actual_consumption_at',
 'geothermal_actual_aggregated_at',
 'geothermal_actual_consumption_at',
 'hydro_pumped_storage_actual_aggregated_at',
 'hydro_pumped_storage_actual_consumption_at',
 'hydro_run-of-river_and_poundage_actual_aggregated_at',
 'hydro_run-of-river_and_poundage_actual_consumption_at',
 'hydro_water_reservoir_actual_aggregated_at',
 'hydro_water_reservoir_actual_consumption_at',
 'other_actual_aggregated_at',
 'other_actual_consumption_at',
 'other_renewable_actual_aggregated_at',
 'other_renewable_actual_consumption_at',
 'solar_actual_aggregated_at',
 'solar_actual_consumption_at',
 'waste_actual_aggregated_at',
 'waste_actual_consumption_at',
 'wind_onshore_actual_aggregated_at',
 'wind_

In [197]:
generation_at = (
    generation_at.tail(-2)
    .rename(dict(zip(generation_at.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .group_by("date")
    .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [198]:
generation_at.pipe(print)
generation_at["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 28)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ biomass_ac ┆ fossil_gas ┆ … ┆ waste_actu ┆ wind_onsh ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ tual_consu ┆ _actual_ag ┆   ┆ al_consump ┆ ore_actua ┆ ore_actua ┆ --- │
│ str        ┆ gated_at   ┆ mption_at  ┆ gregated_a ┆   ┆ tion_at    ┆ l_aggrega ┆ l_consump ┆ i16 │
│            ┆ ---        ┆ ---        ┆ t          ┆   ┆ ---        ┆ ted_a…    ┆ tion_…    ┆     │
│            ┆ f64        ┆ f64        ┆ ---        ┆   ┆ f64        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆            ┆ f64        ┆   ┆            ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 304.0      ┆ 0.0        ┆ 1282.75    ┆ … ┆ 0.0        ┆ 248.0     ┆ 0.0       ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### Switzerland

In [199]:
generation_ch = pl.read_csv("./Raw Data/generation_actual_all_CH.csv", has_header=False)
generation_ch.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7
str,str,str,str,str,str,str
"""date""","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Nuclear""","""Solar""","""Wind Onshore"""
"""2019-01-01 00:…","""81.0""","""123.0""","""481.0""","""3243.0""","""0.0""","""1.0"""
"""2019-01-01 01:…","""76.0""","""124.0""","""393.0""","""3243.0""","""0.0""","""0.0"""


In [200]:
column_names = (
    generation_ch.head(1)
    .transpose()
    .with_columns(pl.col("column_0").str.strip_chars())
    .with_columns(pl.col("column_0").str.to_lowercase().str.replace_all(" ", "_"))
    .with_columns(pl.concat_str([pl.col("column_0"), pl.lit("_ch")], separator=""))[
        "column_0"
    ]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'hydro_pumped_storage_ch',
 'hydro_run-of-river_and_poundage_ch',
 'hydro_water_reservoir_ch',
 'nuclear_ch',
 'solar_ch',
 'wind_onshore_ch']

In [201]:
generation_ch = (
    generation_ch.tail(-1)
    .rename(dict(zip(generation_ch.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [202]:
generation_ch.pipe(print)
generation_ch["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 8)
┌─────────────┬─────────────┬─────────────┬─────────────┬────────────┬──────────┬────────────┬─────┐
│ date        ┆ hydro_pumpe ┆ hydro_run-o ┆ hydro_water ┆ nuclear_ch ┆ solar_ch ┆ wind_onsho ┆ dst │
│ ---         ┆ d_storage_c ┆ f-river_and ┆ _reservoir_ ┆ ---        ┆ ---      ┆ re_ch      ┆ --- │
│ str         ┆ h           ┆ _poundage_… ┆ ch          ┆ f64        ┆ f64      ┆ ---        ┆ i16 │
│             ┆ ---         ┆ ---         ┆ ---         ┆            ┆          ┆ f64        ┆     │
│             ┆ f64         ┆ f64         ┆ f64         ┆            ┆          ┆            ┆     │
╞═════════════╪═════════════╪═════════════╪═════════════╪════════════╪══════════╪════════════╪═════╡
│ 2019-01-01  ┆ 81.0        ┆ 123.0       ┆ 481.0       ┆ 3243.0     ┆ 0.0      ┆ 1.0        ┆ 1   │
│ 00:00       ┆             ┆             ┆             ┆            ┆          ┆            ┆     │
│ 2019-01-01  ┆ 76.0        ┆ 124.0       ┆ 393.0       ┆ 3243.0     ┆ 0

#### Germany

In [203]:
generation_de = pl.read_csv("./Raw Data/generation_actual_all_DE_LU.csv", has_header=False)
generation_de.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Brown c…","""Fossil Coal-de…","""Fossil Gas""","""Fossil Gas""","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Nuclear""","""Nuclear""","""Other""","""Other renewabl…","""Other renewabl…","""Solar""","""Solar""","""Waste""","""Wind Offshore""","""Wind Onshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…"
"""2019-01-01 00:…","""4812.0""","""6932.0""","""273.0""","""3410.0""","""1.0""","""3345.0""","""482.0""",,"""19.0""","""350.0""","""1827.0""","""1485.0""","""132.0""",,"""9002.0""",,"""475.0""","""107.0""",,"""0.0""",,"""783.0""","""3177.0""","""19366.0""",


In [204]:
column_names = (
    generation_de.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("DE")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_de',
 'fossil_brown_coal_lignite_actual_aggregated_de',
 'fossil_coal-derived_gas_actual_aggregated_de',
 'fossil_gas_actual_aggregated_de',
 'fossil_gas_actual_consumption_de',
 'fossil_hard_coal_actual_aggregated_de',
 'fossil_oil_actual_aggregated_de',
 'fossil_oil_actual_consumption_de',
 'geothermal_actual_aggregated_de',
 'hydro_pumped_storage_actual_aggregated_de',
 'hydro_pumped_storage_actual_consumption_de',
 'hydro_run-of-river_and_poundage_actual_aggregated_de',
 'hydro_water_reservoir_actual_aggregated_de',
 'hydro_water_reservoir_actual_consumption_de',
 'nuclear_actual_aggregated_de',
 'nuclear_actual_consumption_de',
 'other_actual_aggregated_de',
 'other_renewable_actual_aggregated_de',
 'other_renewable_actual_consumption_de',
 'solar_actual_aggregated_de',
 'solar_actual_consumption_de',
 'waste_actual_aggregated_de',
 'wind_offshore_actual_aggregated_de',
 'wind_onshore_actual_aggregated_de',
 'wind_onshore_actual_consumption_de'

In [205]:
generation_de = (
    generation_de.tail(-2)
    .rename(dict(zip(generation_de.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    .group_by("date")
    .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [206]:
generation_de.pipe(print)
generation_de["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_568, 27)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_bro ┆ fossil_coa ┆ … ┆ wind_offsh ┆ wind_onsh ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ wn_coal_li ┆ l-derived_ ┆   ┆ ore_actual ┆ ore_actua ┆ ore_actua ┆ --- │
│ str        ┆ gated_de   ┆ gnite_actu ┆ gas_actual ┆   ┆ _aggregate ┆ l_aggrega ┆ l_consump ┆ i16 │
│            ┆ ---        ┆ al…        ┆ _a…        ┆   ┆ d_…        ┆ ted_d…    ┆ tion_…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ ---        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆ f64        ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 4831.25    ┆ 6335.0     ┆ 429.0      ┆ … ┆ 3134.0     ┆ 20401.5   ┆ null      ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### France

In [207]:
generation_fr = pl.read_csv("./Raw Data/generation_actual_all_FR.csv", has_header=False)
generation_fr.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Hydro Water Re…","""Nuclear""","""Solar""","""Waste""","""Wind Offshore""","""Wind Offshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…"
"""2019-01-01 00:…","""351.0""","""2722.0""","""0.0""",,"""207.0""",,"""1377.0""","""3552.0""","""1054.0""",,"""55627.0""","""0.0""","""253.0""",,,"""1622.0"""


In [208]:
column_names = (
    generation_fr.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("FR")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_fr',
 'fossil_gas_actual_aggregated_fr',
 'fossil_hard_coal_actual_aggregated_fr',
 'fossil_hard_coal_actual_consumption_fr',
 'fossil_oil_actual_aggregated_fr',
 'hydro_pumped_storage_actual_aggregated_fr',
 'hydro_pumped_storage_actual_consumption_fr',
 'hydro_run-of-river_and_poundage_actual_aggregated_fr',
 'hydro_water_reservoir_actual_aggregated_fr',
 'hydro_water_reservoir_actual_consumption_fr',
 'nuclear_actual_aggregated_fr',
 'solar_actual_aggregated_fr',
 'waste_actual_aggregated_fr',
 'wind_offshore_actual_aggregated_fr',
 'wind_offshore_actual_consumption_fr',
 'wind_onshore_actual_aggregated_fr']

In [209]:
generation_fr = (
    generation_fr.tail(-2)
    .rename(dict(zip(generation_fr.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    # .group_by("date")
    # .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [210]:
generation_fr.pipe(print)
generation_fr["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_555, 18)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_gas ┆ fossil_har ┆ … ┆ wind_offsh ┆ wind_offs ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ _actual_ag ┆ d_coal_act ┆   ┆ ore_actual ┆ hore_actu ┆ ore_actua ┆ --- │
│ str        ┆ gated_fr   ┆ gregated_f ┆ ual_aggreg ┆   ┆ _aggregate ┆ al_consum ┆ l_aggrega ┆ i16 │
│            ┆ ---        ┆ r          ┆ at…        ┆   ┆ d_…        ┆ ption…    ┆ ted_f…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ ---        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆ f64        ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 351.0      ┆ 2722.0     ┆ 0.0        ┆ … ┆ null       ┆ null      ┆ 1622.0    ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

#### Italy

In [211]:
generation_it = pl.read_csv("./Raw Data/generation_actual_all_IT.csv", has_header=False)
generation_it.head(3)

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""date""","""Biomass""","""Fossil Coal-de…","""Fossil Gas""","""Fossil Hard co…","""Fossil Hard co…","""Fossil Oil""","""Fossil Oil""","""Geothermal""","""Hydro Pumped S…","""Hydro Pumped S…","""Hydro Run-of-r…","""Hydro Water Re…","""Other""","""Other""","""Solar""","""Solar""","""Waste""","""Wind Offshore""","""Wind Onshore"""
,"""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Consump…","""Actual Aggrega…","""Actual Aggrega…","""Actual Aggrega…"
"""2019-01-01 00:…","""495.0""","""776.0""","""8053.0""","""1938.0""",,"""8.0""",,"""674.0""","""1.0""","""29.0""","""1891.0""","""289.0""","""3070.0""",,"""0.0""",,"""37.0""",,"""5076.0"""


In [212]:
column_names = (
    generation_it.head(2)
    .transpose()
    .with_columns(pl.col("column_1").fill_null(value=""))
    .with_columns(
        pl.concat_str([pl.col("column_0"), pl.col("column_1")], separator=" ")
        .str.strip_chars()
        .alias("header")
    )
    .with_columns(pl.concat_str(["header", pl.lit("IT")], separator=" ").str.to_lowercase().str.replace_all(" ", "_").str.replace_all("/", "_"))
    ["header"]
    .to_list()
)

column_names[0] = "date"

column_names

['date',
 'biomass_actual_aggregated_it',
 'fossil_coal-derived_gas_actual_aggregated_it',
 'fossil_gas_actual_aggregated_it',
 'fossil_hard_coal_actual_aggregated_it',
 'fossil_hard_coal_actual_consumption_it',
 'fossil_oil_actual_aggregated_it',
 'fossil_oil_actual_consumption_it',
 'geothermal_actual_aggregated_it',
 'hydro_pumped_storage_actual_aggregated_it',
 'hydro_pumped_storage_actual_consumption_it',
 'hydro_run-of-river_and_poundage_actual_aggregated_it',
 'hydro_water_reservoir_actual_aggregated_it',
 'other_actual_aggregated_it',
 'other_actual_consumption_it',
 'solar_actual_aggregated_it',
 'solar_actual_consumption_it',
 'waste_actual_aggregated_it',
 'wind_offshore_actual_aggregated_it',
 'wind_onshore_actual_aggregated_it']

In [213]:
generation_it = (
    generation_it.tail(-2)
    .rename(dict(zip(generation_it.columns, column_names)))
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1h")
    )
    .with_columns(cs.string().cast(pl.Float64))
    # .group_by("date")
    # .agg(pl.all().mean())
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .sort("date")
)

In [214]:
generation_it.pipe(print)
generation_it["date"].value_counts().filter(pl.col("count") != 1).sort("date").pipe(print)

shape: (44_563, 21)
┌────────────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬─────┐
│ date       ┆ biomass_ac ┆ fossil_coa ┆ fossil_gas ┆ … ┆ waste_actu ┆ wind_offs ┆ wind_onsh ┆ dst │
│ ---        ┆ tual_aggre ┆ l-derived_ ┆ _actual_ag ┆   ┆ al_aggrega ┆ hore_actu ┆ ore_actua ┆ --- │
│ str        ┆ gated_it   ┆ gas_actual ┆ gregated_i ┆   ┆ ted_it     ┆ al_aggreg ┆ l_aggrega ┆ i16 │
│            ┆ ---        ┆ _a…        ┆ t          ┆   ┆ ---        ┆ ated_…    ┆ ted_i…    ┆     │
│            ┆ f64        ┆ ---        ┆ ---        ┆   ┆ f64        ┆ ---       ┆ ---       ┆     │
│            ┆            ┆ f64        ┆ f64        ┆   ┆            ┆ f64       ┆ f64       ┆     │
╞════════════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═════╡
│ 2019-01-01 ┆ 495.0      ┆ 776.0      ┆ 8053.0     ┆ … ┆ 37.0       ┆ null      ┆ 5076.0    ┆ 1   │
│ 00:00      ┆            ┆            ┆            ┆   ┆            ┆ 

### Water Reservoirs

In [215]:
hydro_storage_at = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_at", "date": "week_start"})
)

hydro_storage_at.pipe(print)

shape: (265, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_at │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 1.412194e6                 │
│ 2019-01-07 ┆ 1.372937e6                 │
│ 2019-01-14 ┆ 1.326312e6                 │
│ 2019-01-21 ┆ 1.176602e6                 │
│ 2019-01-28 ┆ 1.077808e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 1.543144e6                 │
│ 2024-01-08 ┆ 1.423063e6                 │
│ 2024-01-15 ┆ 1.26075e6                  │
│ 2024-01-22 ┆ 1.157852e6                 │
│ 2024-01-29 ┆ 1.073592e6                 │
└────────────┴────────────────────────────┘


In [216]:
hydro_storage_ch = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_ch", "date": "week_start"})
)

hydro_storage_ch.pipe(print)

shape: (265, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_ch │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 4.656491e6                 │
│ 2019-01-07 ┆ 4.420007e6                 │
│ 2019-01-14 ┆ 4.154307e6                 │
│ 2019-01-21 ┆ 3.693327e6                 │
│ 2019-01-28 ┆ 3.288999e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 4.253319e6                 │
│ 2024-01-08 ┆ 4.031253e6                 │
│ 2024-01-15 ┆ 2.996454e6                 │
│ 2024-01-22 ┆ 3.326345e6                 │
│ 2024-01-29 ┆ 2.47213e6                  │
└────────────┴────────────────────────────┘


In [217]:
hydro_storage_fr = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_fr", "date": "week_start"})
)

hydro_storage_fr.pipe(print)

shape: (263, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_fr │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 2.186488e6                 │
│ 2019-01-07 ┆ 2.055622e6                 │
│ 2019-01-14 ┆ 1.931241e6                 │
│ 2019-01-21 ┆ 1.750281e6                 │
│ 2019-01-28 ┆ 1.591274e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 2.795844e6                 │
│ 2024-01-08 ┆ 2.368286e6                 │
│ 2024-01-15 ┆ 2.201546e6                 │
│ 2024-01-22 ┆ 2.146234e6                 │
│ 2024-01-29 ┆ 2.010901e6                 │
└────────────┴────────────────────────────┘


In [218]:
hydro_storage_it = (
    pl.read_csv("./Raw Data/hydro_reservoir_storage_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(
        pl.col("date").dt.convert_time_zone("Europe/Zurich").dt.truncate("1w")
    )
    .sort("date")
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d"))
    .unique()
    .sort("date")
    .rename({"hydro_reservoir_storage_mwh": "hydro_reservoir_storage_it", "date": "week_start"})
)

hydro_storage_it.pipe(print)

shape: (264, 2)
┌────────────┬────────────────────────────┐
│ week_start ┆ hydro_reservoir_storage_it │
│ ---        ┆ ---                        │
│ str        ┆ f64                        │
╞════════════╪════════════════════════════╡
│ 2018-12-31 ┆ 3.036299e6                 │
│ 2019-01-07 ┆ 2.704739e6                 │
│ 2019-01-14 ┆ 2.593794e6                 │
│ 2019-01-21 ┆ 2.49019e6                  │
│ 2019-01-28 ┆ 2.473671e6                 │
│ …          ┆ …                          │
│ 2023-12-25 ┆ 3.280575e6                 │
│ 2024-01-08 ┆ 3.053817e6                 │
│ 2024-01-15 ┆ 2.926584e6                 │
│ 2024-01-22 ┆ 2.778409e6                 │
│ 2024-01-29 ┆ 2.619505e6                 │
└────────────┴────────────────────────────┘


### Crossborder Flows

In [219]:
filenames = [filename for filename in os.listdir("./Raw Data/") if "physical_flow" in filename]
filenames

['crossborder_physical_flow_mw_AT_CH.csv',
 'crossborder_physical_flow_mw_CH_AT.csv',
 'crossborder_physical_flow_mw_CH_DE_LU.csv',
 'crossborder_physical_flow_mw_CH_FR.csv',
 'crossborder_physical_flow_mw_CH_IT.csv',
 'crossborder_physical_flow_mw_DE_LU_CH.csv',
 'crossborder_physical_flow_mw_FR_CH.csv',
 'crossborder_physical_flow_mw_IT_CH.csv']

In [220]:
flow_at_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_AT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_at_ch"})
    .sort("date")
)

flow_at_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_at_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 763.0                         ┆ 1   │
│ 2019-01-01 01:00 ┆ 497.0                         ┆ 1   │
│ 2019-01-01 02:00 ┆ 427.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 452.0                         ┆ 1   │
│ 2019-01-01 04:00 ┆ 603.0                         ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 194.0                         ┆ 1   │
│ 2024-01-31 20:00 ┆ 504.0                         ┆ 1   │
│ 2024-01-31 21:00 ┆ 678.0                         ┆ 1   │
│ 2024-01-31 22:00 ┆ 726.0                         ┆ 1   │
│ 2024-01-31 23:00 ┆ 952.0                         ┆ 1   │
└──────────────────┴─────────────────

In [221]:
flow_ch_at = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_at"})
    .sort("date")
)

flow_ch_at.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_at ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

In [222]:
flow_ch_de_lu = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_de_lu"})
    .sort("date")
)

flow_ch_de_lu.pipe(print)

shape: (110_368, 3)
┌──────────────────┬──────────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_de_lu ┆ dst │
│ ---              ┆ ---                              ┆ --- │
│ str              ┆ f64                              ┆ i16 │
╞══════════════════╪══════════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 595.0                            ┆ 1   │
│ 2019-01-01 01:00 ┆ 502.0                            ┆ 1   │
│ 2019-01-01 02:00 ┆ 512.0                            ┆ 1   │
│ 2019-01-01 03:00 ┆ 544.0                            ┆ 1   │
│ 2019-01-01 04:00 ┆ 451.0                            ┆ 1   │
│ …                ┆ …                                ┆ …   │
│ 2024-01-31 22:45 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:15 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:30 ┆ 0.0                              ┆ 1   │
│ 2024-01-31 23:45 ┆ 0.0                          

In [223]:
flow_ch_fr = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_fr"})
    .sort("date")
)

flow_ch_fr.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_fr ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 553.0                         ┆ 1   │
│ 2019-01-01 01:00 ┆ 233.0                         ┆ 1   │
│ 2019-01-01 02:00 ┆ 280.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 56.0                          ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

In [224]:
flow_ch_it = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_CH_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_ch_it"})
    .sort("date")
)

flow_ch_it.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_ch_it ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1468.0                        ┆ 1   │
│ 2019-01-01 01:00 ┆ 1162.0                        ┆ 1   │
│ 2019-01-01 02:00 ┆ 931.0                         ┆ 1   │
│ 2019-01-01 03:00 ┆ 1303.0                        ┆ 1   │
│ 2019-01-01 04:00 ┆ 1225.0                        ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 4691.0                        ┆ 1   │
│ 2024-01-31 20:00 ┆ 4682.0                        ┆ 1   │
│ 2024-01-31 21:00 ┆ 4496.0                        ┆ 1   │
│ 2024-01-31 22:00 ┆ 4545.0                        ┆ 1   │
│ 2024-01-31 23:00 ┆ 4645.0                        ┆ 1   │
└──────────────────┴─────────────────

In [225]:
flow_de_lu_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_DE_LU_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_de_lu_ch"})
    .sort("date")
)

flow_de_lu_ch.pipe(print)

shape: (110_368, 3)
┌──────────────────┬──────────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_de_lu_ch ┆ dst │
│ ---              ┆ ---                              ┆ --- │
│ str              ┆ f64                              ┆ i16 │
╞══════════════════╪══════════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 3652.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 3536.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 3677.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 3714.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 3427.0                           ┆ 1   │
│ …                ┆ …                                ┆ …   │
│ 2024-01-31 22:45 ┆ 2845.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 3305.0                           ┆ 1   │
│ 2024-01-31 23:15 ┆ 3449.0                           ┆ 1   │
│ 2024-01-31 23:30 ┆ 3209.0                           ┆ 1   │
│ 2024-01-31 23:45 ┆ 3010.0                       

In [226]:
flow_fr_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_FR_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_fr_ch"})
    .sort("date")
)

flow_fr_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_fr_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 115.0                         ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 1139.0                        ┆ 1   │
│ 2024-01-31 20:00 ┆ 1154.0                        ┆ 1   │
│ 2024-01-31 21:00 ┆ 1546.0                        ┆ 1   │
│ 2024-01-31 22:00 ┆ 1693.0                        ┆ 1   │
│ 2024-01-31 23:00 ┆ 1905.0                        ┆ 1   │
└──────────────────┴─────────────────

In [227]:
flow_it_ch = (
    pl.read_csv("./Raw Data/crossborder_physical_flow_mw_IT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_physical_flow_mw": "crossborder_actual_flow_it_ch"})
    .sort("date")
)

flow_it_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬───────────────────────────────┬─────┐
│ date             ┆ crossborder_actual_flow_it_ch ┆ dst │
│ ---              ┆ ---                           ┆ --- │
│ str              ┆ f64                           ┆ i16 │
╞══════════════════╪═══════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 01:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 02:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 03:00 ┆ 0.0                           ┆ 1   │
│ 2019-01-01 04:00 ┆ 0.0                           ┆ 1   │
│ …                ┆ …                             ┆ …   │
│ 2024-01-31 19:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 20:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 21:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 22:00 ┆ 0.0                           ┆ 1   │
│ 2024-01-31 23:00 ┆ 0.0                           ┆ 1   │
└──────────────────┴─────────────────

### Cross Border Capacity Day Ahead Forecast

In [228]:
capacity_forecast_at_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_AT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_at_ch"})
    .sort("date")
)

capacity_forecast_at_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_at_ch ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1200.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [229]:
capacity_forecast_ch_at = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_AT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_at"})
    .sort("date")
)

capacity_forecast_ch_at.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_at ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 01:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 02:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 03:00 ┆ 700.0                   ┆ 1   │
│ 2019-01-01 04:00 ┆ 700.0                   ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [230]:
capacity_forecast_ch_de_lu = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_DE_LU.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_de_lu"})
    .sort("date")
)

capacity_forecast_ch_de_lu.pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_de_lu ┆ dst │
│ ---              ┆ ---                        ┆ --- │
│ str              ┆ f64                        ┆ i16 │
╞══════════════════╪════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 01:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 02:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 03:00 ┆ 4000.0                     ┆ 1   │
│ 2019-01-01 04:00 ┆ 4000.0                     ┆ 1   │
│ …                ┆ …                          ┆ …   │
│ 2024-01-31 19:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 20:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 21:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 22:00 ┆ 4000.0                     ┆ 1   │
│ 2024-01-31 23:00 ┆ 4000.0                     ┆ 1   │
└──────────────────┴────────────────────────────┴─────┘


In [231]:
capacity_forecast_ch_fr = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_FR.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_fr"})
    .sort("date")
)

capacity_forecast_ch_fr.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_fr ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1200.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1200.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1300.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1300.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [232]:
capacity_forecast_ch_it = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_CH_IT.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_ch_it"})
    .sort("date")
)

capacity_forecast_ch_it.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_ch_it ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 2513.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 2513.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 4322.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 4322.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 4137.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 4137.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 3620.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [233]:
capacity_forecast_de_lu_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_DE_LU_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_de_lu_ch"})
    .sort("date")
)

capacity_forecast_de_lu_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬────────────────────────────┬─────┐
│ date             ┆ capacity_forecast_de_lu_ch ┆ dst │
│ ---              ┆ ---                        ┆ --- │
│ str              ┆ f64                        ┆ i16 │
╞══════════════════╪════════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 01:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 02:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 03:00 ┆ 800.0                      ┆ 1   │
│ 2019-01-01 04:00 ┆ 800.0                      ┆ 1   │
│ …                ┆ …                          ┆ …   │
│ 2024-01-31 19:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 20:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 21:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 22:00 ┆ 800.0                      ┆ 1   │
│ 2024-01-31 23:00 ┆ 800.0                      ┆ 1   │
└──────────────────┴────────────────────────────┴─────┘


In [234]:
capacity_forecast_fr_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_FR_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_FR_CH"})
    .sort("date")
)

capacity_forecast_fr_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_FR_CH ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 3000.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 3000.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 3200.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 3200.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


In [235]:
capacity_forecast_it_ch = (
    pl.read_csv("./Raw Data/crossborder_capacity_forecast_IT_CH.csv")
    .with_columns(pl.col("date").str.to_datetime())
    .with_columns(pl.col("date").dt.convert_time_zone("Europe/Zurich"))
    .with_columns((pl.col("date").dt.dst_offset() == 0).cast(pl.Int16).alias("dst"))
    .with_columns(pl.col("date").dt.strftime("%Y-%m-%d %H:%M"))
    .unique(subset=["date", "dst"])
    .rename({"crossborder_capacity_forecast_day_ahead_mw": "capacity_forecast_it_ch"})
    .sort("date")
)

capacity_forecast_it_ch.pipe(print)

shape: (44_563, 3)
┌──────────────────┬─────────────────────────┬─────┐
│ date             ┆ capacity_forecast_it_ch ┆ dst │
│ ---              ┆ ---                     ┆ --- │
│ str              ┆ f64                     ┆ i16 │
╞══════════════════╪═════════════════════════╪═════╡
│ 2019-01-01 00:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 01:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 02:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 03:00 ┆ 1910.0                  ┆ 1   │
│ 2019-01-01 04:00 ┆ 1910.0                  ┆ 1   │
│ …                ┆ …                       ┆ …   │
│ 2024-01-31 19:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 20:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 21:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 22:00 ┆ 1810.0                  ┆ 1   │
│ 2024-01-31 23:00 ┆ 1910.0                  ┆ 1   │
└──────────────────┴─────────────────────────┴─────┘


### Joining everything

In [236]:
df_final = (
    jao_chde.drop("day")
    .with_columns(
        pl.col("date")
        .str.to_datetime()
        .dt.truncate("1w")
        .dt.date()
        .dt.strftime("%Y-%m-%d")
        .alias("week_start")
    )
    .select(["date", "week_start", "dst", "auction_price", "allocatedCapacity", "ATC"])
    # Day Ahead Prices
    .join(da_at, how="left", on=["date", "dst"])
    .join(da_ch, how="left", on=["date", "dst"])
    .join(da_delu, how="left", on=["date", "dst"])
    .join(da_fr, how="left", on=["date", "dst"])
    .join(da_it, how="left", on=["date", "dst"])
    # Actual Load
    .join(load_at, how="left", on=["date", "dst"])
    .join(load_ch, how="left", on=["date", "dst"])
    .join(load_de, how="left", on=["date", "dst"])
    .join(load_fr, how="left", on=["date", "dst"])
    .join(load_it, how="left", on=["date", "dst"])
    # Forecast Wind and Solar
    .join(wind_solar_forecast_at, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_ch, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_de, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_fr, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_it, how="left", on=["date", "dst"])
    # Actual Generation All Types
    .join(generation_at, how="left", on=["date", "dst"])
    .join(generation_ch, how="left", on=["date", "dst"])
    .join(generation_de, how="left", on=["date", "dst"])
    .join(generation_fr, how="left", on=["date", "dst"])
    .join(generation_it, how="left", on=["date", "dst"])
    # Hydro Reservoir Storage
    .join(hydro_storage_at, how="left", on=["week_start"])
    .join(hydro_storage_ch, how="left", on=["week_start"])
    .join(hydro_storage_fr, how="left", on=["week_start"])
    .join(hydro_storage_it, how="left", on=["week_start"])
    # Cross Border Physical Flow
    .join(flow_at_ch, how="left", on=["date", "dst"])
    .join(flow_ch_at, how="left", on=["date", "dst"])
    .join(flow_ch_de_lu, how="left", on=["date", "dst"])
    .join(flow_ch_fr, how="left", on=["date", "dst"])
    .join(flow_ch_it, how="left", on=["date", "dst"])
    .join(flow_de_lu_ch, how="left", on=["date", "dst"])
    .join(flow_fr_ch, how="left", on=["date", "dst"])
    .join(flow_it_ch, how="left", on=["date", "dst"])
    # Cross Border Capacities Forecast
    .join(capacity_forecast_at_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_at, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_de_lu, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_fr, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_it, how="left", on=["date", "dst"])
    .join(capacity_forecast_de_lu_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_fr_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_it_ch, how="left", on=["date", "dst"])
    # Drop
    .drop(["week_start"])
)

df_final

date,dst,auction_price,allocatedCapacity,ATC,day_ahead_price_at,day_ahead_price_ch,day_ahead_price_de,day_ahead_price_fr,day_ahead_price_ch_right,actual_load_at,actual_load_ch,actual_load_de,actual_load_fr,actual_load_it,solar_forecast_at,wind_onshore_forecast_at,solar_forecast_ch,wind_onshore_forecast_ch,solar_forecast_de,wind_onshore_forecast_de,wind_offshore_forecast_de,solar_forecast_fr,wind_onshore_forecast_fr,wind_offshore_forecast_fr,solar_forecast_it,wind_onshore_forecast_it,biomass_actual_aggregated_at,biomass_actual_consumption_at,fossil_gas_actual_aggregated_at,fossil_gas_actual_consumption_at,fossil_hard_coal_actual_aggregated_at,fossil_hard_coal_actual_consumption_at,fossil_oil_actual_aggregated_at,fossil_oil_actual_consumption_at,geothermal_actual_aggregated_at,geothermal_actual_consumption_at,…,fossil_gas_actual_aggregated_it,fossil_hard_coal_actual_aggregated_it,fossil_hard_coal_actual_consumption_it,fossil_oil_actual_aggregated_it,fossil_oil_actual_consumption_it,geothermal_actual_aggregated_it,hydro_pumped_storage_actual_aggregated_it,hydro_pumped_storage_actual_consumption_it,hydro_run-of-river_and_poundage_actual_aggregated_it,hydro_water_reservoir_actual_aggregated_it,other_actual_aggregated_it,other_actual_consumption_it,solar_actual_aggregated_it,solar_actual_consumption_it,waste_actual_aggregated_it,wind_offshore_actual_aggregated_it,wind_onshore_actual_aggregated_it,hydro_reservoir_storage_at,hydro_reservoir_storage_ch,hydro_reservoir_storage_fr,hydro_reservoir_storage_it,crossborder_actual_flow_at_ch,crossborder_actual_flow_ch_at,crossborder_actual_flow_ch_de_lu,crossborder_actual_flow_ch_fr,crossborder_actual_flow_ch_it,crossborder_actual_flow_de_lu_ch,crossborder_actual_flow_fr_ch,crossborder_actual_flow_it_ch,capacity_forecast_at_ch,capacity_forecast_ch_at,capacity_forecast_ch_de_lu,capacity_forecast_ch_fr,capacity_forecast_ch_it,capacity_forecast_de_lu_ch,capacity_forecast_FR_CH,capacity_forecast_it_ch
str,i16,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2021-03-27 19:…",1,0.0,4335,4336,66.1,63.19,66.1,66.1,66.1,7110.25,6453.0,60467.5,55907.0,34913.0,0.0,1814.0,0.0,0.0,0.0,20533.0,5122.5,15.0,2114.0,,0.0,,180.0,0.0,1304.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,13242.0,864.0,,93.0,,632.0,1974.0,,4709.0,1208.0,2248.0,,0.0,,39.0,,1179.0,349156.0,915140.0,994654.0,2.205432e6,657.0,0.0,258.0,168.0,3634.0,825.0,0.0,0.0,900.0,1200.0,4000.0,1400.0,4090.0,800.0,2385.0,1810.0
"""2022-08-03 11:…",0,0.0,4060,4070,385.06,440.13,244.61,497.13,497.13,7388.0,6702.0,64852.75,51032.0,43532.0,658.0,24.0,58.0,0.0,33008.5,3039.0,3266.75,8339.0,454.0,,9718.0,,308.0,0.0,693.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,17813.0,2058.0,,362.0,,605.0,280.0,,2265.0,264.0,1824.0,,9842.0,,24.0,2.0,987.0,1.715317e6,4.630849e6,2.274044e6,2.361321e6,824.0,0.0,316.0,1186.0,2735.0,1431.0,0.0,0.0,900.0,1200.0,3700.0,900.0,3397.0,1800.0,2700.0,1440.0
"""2023-07-05 07:…",0,0.57,3670,3670,98.55,98.52,92.59,95.07,124.32,6975.0,6654.0,58007.75,42295.0,35988.0,232.0,126.0,26.0,0.0,5378.0,26798.75,4205.25,717.0,7110.0,,2289.0,,98.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,14713.0,1939.0,,22.0,,613.0,379.0,83.0,5141.0,694.0,2033.0,,2774.0,,348.0,1.0,855.0,1.236938e6,3.409793e6,2.965488e6,3.726987e6,226.0,0.0,70.0,0.0,2139.0,283.0,1218.0,0.0,900.0,800.0,3600.0,1200.0,2947.0,800.0,3000.0,1440.0
"""2023-08-05 21:…",0,12.49,3561,3561,103.91,92.95,109.23,90.0,103.91,5272.75,5332.0,42471.5,36934.0,28750.0,0.0,2301.0,0.0,0.0,7.25,4656.25,520.0,39.0,12246.0,,0.0,,102.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,8855.0,191.0,,15.0,,626.0,1955.0,2.0,5444.0,982.0,1777.0,,0.0,,358.0,12.0,3120.0,1.69321e6,4.21398e6,2.985265e6,3.645141e6,120.0,0.0,2052.0,241.0,2159.0,134.0,0.0,0.0,800.0,1200.0,4000.0,800.0,2981.0,1400.0,2227.0,1440.0
"""2023-10-07 08:…",0,0.06,4094,4095,78.02,86.96,70.82,72.94,140.0,6013.0,5015.0,48890.75,36560.0,29757.0,158.0,855.0,13.0,0.0,3211.0,24084.25,2200.5,0.0,2379.0,,2598.0,,164.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,10575.0,355.0,,16.0,,624.0,364.0,,4298.0,687.0,1173.0,,3057.0,,373.0,1.0,443.0,1.861453e6,4.689763e6,2.705939e6,3.336157e6,155.0,0.0,0.0,0.0,2806.0,1952.0,1238.0,0.0,450.0,450.0,4000.0,1200.0,3070.0,800.0,2385.0,1810.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""2022-12-16 11:…",1,0.88,3713,3714,538.45,500.99,540.83,537.39,537.39,9809.0,9687.0,70394.0,75647.0,41763.0,114.0,972.0,25.0,0.0,5047.5,1943.0,299.0,3274.0,3186.0,,2849.0,,276.0,0.0,2913.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,19163.0,3143.0,,319.0,,618.0,454.0,,3429.0,651.0,3053.0,,2614.0,,32.0,29.0,4739.0,1.677478e6,4.827689e6,2.190805e6,2.239061e6,0.0,507.0,1654.0,1378.0,183.0,0.0,0.0,0.0,1000.0,1200.0,4000.0,1200.0,3978.0,2000.0,3700.0,1810.0
"""2022-01-26 02:…",1,0.0,4332,4335,219.47,219.62,215.41,231.86,231.86,6406.75,7604.0,54782.0,70905.0,26833.0,0.0,1332.0,0.0,0.0,0.0,6918.25,3525.25,0.0,480.0,,0.0,,316.0,0.0,2785.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,14629.0,2114.0,,233.0,,640.0,,1046.0,1894.0,178.0,1791.0,,0.0,,33.0,,2494.0,1.037006e6,2.740467e6,1.817029e6,2.195808e6,1054.0,0.0,126.0,284.0,811.0,2920.0,0.0,0.0,1200.0,1200.0,4000.0,1400.0,2977.0,1800.0,3200.0,1910.0
"""2023-11-06 23:…",1,0.09,3364,3365,89.46,77.41,87.74,89.36,117.01,6152.25,6499.0,52596.75,51502.0,27526.0,0.0,277.0,0.0,0.0,0.0,26985.75,4550.5,0.0,7971.0,,0.0,,162.0,0.0,284.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,7378.0,803.0,,28.0,,608.0,568.0,,5194.0,562.0,1628.0,,0.0,,350.0,0.0,2253.0,1.819247e6,5.093846e6,2.847567e6,3.598085e6,739.0,0.0,0.0,0.0,3595.0,1277.0,1638.0,0.0,1200.0,1200.0,4000.0,1200.0,3662.0,800.0,2385.0,1910.0
"""2024-01-06 15:…",1,0.25,4284,4284,94.22,91.31,95.09,87.3,103.0,6485.25,6476.0,55069.5,57933.0,26175.0,150.0,604.0,10.0,0.0,587.0,11904.5,5277.75,2917.0,6883.0,,486.0,,158.0,0.0,1154.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,8367.0,492.0,,21.0,,627.0,446.0,2.0,2841.0,424.0,1719.0,,462.0,,342.0,14.0,3263.0,,,,,0.0,444.0,1651.0,0.0,1553.0,331.0,2316.0,0.0,850.0,1200.0,4000.0,1200.0,2538.0,800.0,3200.0,1910.0


In [237]:
df_final.to_pandas().to_csv("0_df_final_ch-de.csv", index=False)

In [238]:
df_final = (
    jao_dech.drop("day")
    .with_columns(
        pl.col("date")
        .str.to_datetime()
        .dt.truncate("1w")
        .dt.date()
        .dt.strftime("%Y-%m-%d")
        .alias("week_start")
    )
    .select(["date", "week_start", "dst", "auction_price", "allocatedCapacity", "ATC"])
    # Day Ahead Prices
    .join(da_at, how="left", on=["date", "dst"])
    .join(da_ch, how="left", on=["date", "dst"])
    .join(da_delu, how="left", on=["date", "dst"])
    .join(da_fr, how="left", on=["date", "dst"])
    .join(da_it, how="left", on=["date", "dst"])
    # Actual Load
    .join(load_at, how="left", on=["date", "dst"])
    .join(load_ch, how="left", on=["date", "dst"])
    .join(load_de, how="left", on=["date", "dst"])
    .join(load_fr, how="left", on=["date", "dst"])
    .join(load_it, how="left", on=["date", "dst"])
    # Forecast Wind and Solar
    .join(wind_solar_forecast_at, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_ch, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_de, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_fr, how="left", on=["date", "dst"])
    .join(wind_solar_forecast_it, how="left", on=["date", "dst"])
    # Actual Generation All Types
    .join(generation_at, how="left", on=["date", "dst"])
    .join(generation_ch, how="left", on=["date", "dst"])
    .join(generation_de, how="left", on=["date", "dst"])
    .join(generation_fr, how="left", on=["date", "dst"])
    .join(generation_it, how="left", on=["date", "dst"])
    # Hydro Reservoir Storage
    .join(hydro_storage_at, how="left", on=["week_start"])
    .join(hydro_storage_ch, how="left", on=["week_start"])
    .join(hydro_storage_fr, how="left", on=["week_start"])
    .join(hydro_storage_it, how="left", on=["week_start"])
    # Cross Border Physical Flow
    .join(flow_at_ch, how="left", on=["date", "dst"])
    .join(flow_ch_at, how="left", on=["date", "dst"])
    .join(flow_ch_de_lu, how="left", on=["date", "dst"])
    .join(flow_ch_fr, how="left", on=["date", "dst"])
    .join(flow_ch_it, how="left", on=["date", "dst"])
    .join(flow_de_lu_ch, how="left", on=["date", "dst"])
    .join(flow_fr_ch, how="left", on=["date", "dst"])
    .join(flow_it_ch, how="left", on=["date", "dst"])
    # Cross Border Capacities Forecast
    .join(capacity_forecast_at_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_at, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_de_lu, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_fr, how="left", on=["date", "dst"])
    .join(capacity_forecast_ch_it, how="left", on=["date", "dst"])
    .join(capacity_forecast_de_lu_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_fr_ch, how="left", on=["date", "dst"])
    .join(capacity_forecast_it_ch, how="left", on=["date", "dst"])
    # Drop
    .drop(["week_start"])
)

df_final

date,dst,auction_price,allocatedCapacity,ATC,day_ahead_price_at,day_ahead_price_ch,day_ahead_price_de,day_ahead_price_fr,day_ahead_price_ch_right,actual_load_at,actual_load_ch,actual_load_de,actual_load_fr,actual_load_it,solar_forecast_at,wind_onshore_forecast_at,solar_forecast_ch,wind_onshore_forecast_ch,solar_forecast_de,wind_onshore_forecast_de,wind_offshore_forecast_de,solar_forecast_fr,wind_onshore_forecast_fr,wind_offshore_forecast_fr,solar_forecast_it,wind_onshore_forecast_it,biomass_actual_aggregated_at,biomass_actual_consumption_at,fossil_gas_actual_aggregated_at,fossil_gas_actual_consumption_at,fossil_hard_coal_actual_aggregated_at,fossil_hard_coal_actual_consumption_at,fossil_oil_actual_aggregated_at,fossil_oil_actual_consumption_at,geothermal_actual_aggregated_at,geothermal_actual_consumption_at,…,fossil_gas_actual_aggregated_it,fossil_hard_coal_actual_aggregated_it,fossil_hard_coal_actual_consumption_it,fossil_oil_actual_aggregated_it,fossil_oil_actual_consumption_it,geothermal_actual_aggregated_it,hydro_pumped_storage_actual_aggregated_it,hydro_pumped_storage_actual_consumption_it,hydro_run-of-river_and_poundage_actual_aggregated_it,hydro_water_reservoir_actual_aggregated_it,other_actual_aggregated_it,other_actual_consumption_it,solar_actual_aggregated_it,solar_actual_consumption_it,waste_actual_aggregated_it,wind_offshore_actual_aggregated_it,wind_onshore_actual_aggregated_it,hydro_reservoir_storage_at,hydro_reservoir_storage_ch,hydro_reservoir_storage_fr,hydro_reservoir_storage_it,crossborder_actual_flow_at_ch,crossborder_actual_flow_ch_at,crossborder_actual_flow_ch_de_lu,crossborder_actual_flow_ch_fr,crossborder_actual_flow_ch_it,crossborder_actual_flow_de_lu_ch,crossborder_actual_flow_fr_ch,crossborder_actual_flow_it_ch,capacity_forecast_at_ch,capacity_forecast_ch_at,capacity_forecast_ch_de_lu,capacity_forecast_ch_fr,capacity_forecast_ch_it,capacity_forecast_de_lu_ch,capacity_forecast_FR_CH,capacity_forecast_it_ch
str,i16,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""2020-11-22 00:…",1,24.56,475.0,475.0,16.48,38.43,10.04,35.44,35.44,6586.5,7078.0,48375.75,57884.0,23617.0,0.0,748.0,0.0,0.0,0.0,24856.75,5575.75,0.0,2354.0,,0.0,,192.0,0.0,1165.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,9299.0,1223.0,,146.0,,651.0,8.0,68.0,2270.0,100.0,1609.0,,0.0,,28.0,,4012.0,1.771305e6,5.552431e6,3.012185e6,3.41511e6,1045.0,0.0,35.0,229.0,1823.0,3345.0,0.0,0.0,1200.0,1200.0,4000.0,1200.0,3037.0,800.0,3400.0,1910.0
"""2020-06-17 22:…",0,0.01,2103.0,2104.0,36.59,32.79,36.59,36.59,31.0,6286.75,6285.0,51403.0,44451.0,31906.0,0.0,663.0,0.0,0.0,0.0,4186.25,1225.25,0.0,1351.0,,0.0,,204.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,16336.0,1620.0,,162.0,,624.0,516.0,,7183.0,1436.0,2434.0,,0.0,,22.0,,3327.0,1.122407e6,3.566746e6,2.837466e6,3.756357e6,0.0,708.0,3045.0,649.0,0.0,0.0,0.0,1061.0,900.0,900.0,2400.0,800.0,2426.0,2000.0,3000.0,1440.0
"""2019-03-20 16:…",1,0.53,1191.0,1191.0,42.1,41.82,42.1,42.1,57.89,8453.5,7777.0,66298.5,56741.0,41382.0,234.0,296.0,117.0,5.0,6936.0,6612.5,2364.5,3121.0,1427.0,,2717.0,,308.0,0.0,1403.25,0.0,442.25,0.0,0.0,0.0,0.0,0.0,…,17464.0,2106.0,,283.0,,663.0,1.0,149.0,1634.0,442.0,4542.0,,3203.0,,37.0,,4960.0,651259.0,2.154299e6,1.13505e6,2.223185e6,931.0,0.0,210.0,0.0,2575.0,1445.0,1453.0,0.0,1200.0,650.0,4000.0,1200.0,2674.0,1400.0,2850.0,1810.0
"""2020-08-03 17:…",0,0.04,2589.0,2589.0,39.67,39.12,39.67,39.67,42.31,7425.25,6952.0,57627.0,43141.0,42562.0,134.0,724.0,23.0,0.0,7928.75,2958.75,308.0,4292.0,3855.0,,3781.0,,184.0,0.0,427.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,18266.0,1464.0,,384.0,,599.0,1151.0,232.0,5611.0,1616.0,2502.0,,3245.0,,37.0,,4010.0,1.728887e6,5.2889e6,3.062108e6,3.846485e6,0.0,422.0,2032.0,0.0,770.0,291.0,787.0,0.0,450.0,450.0,3000.0,500.0,1277.0,2000.0,2157.0,1440.0
"""2019-02-14 00:…",1,3.2,943.0,944.0,41.56,44.18,40.74,44.51,47.67,7016.25,7584.0,56211.25,68390.0,29126.0,0.0,1109.0,0.0,3.0,0.0,9600.5,3857.0,0.0,3316.0,,0.0,,312.0,0.0,1842.5,0.0,319.75,0.0,0.0,0.0,0.0,0.0,…,9953.0,1806.0,,22.0,,665.0,88.0,317.0,1811.0,292.0,2920.0,,0.0,,47.0,,5536.0,929553.0,2.785098e6,1.446619e6,2.469445e6,1274.0,0.0,570.0,0.0,2639.0,3654.0,667.0,0.0,1200.0,650.0,4000.0,1200.0,2301.0,1200.0,3000.0,1910.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""2022-05-18 01:…",0,2.71,988.0,988.0,187.89,184.81,187.89,187.89,230.24,5527.5,6019.0,47351.75,40439.0,26282.0,0.0,724.0,0.0,0.0,0.0,8648.0,2424.25,0.0,3012.0,,0.0,,280.0,0.0,983.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,9963.0,1718.0,,27.0,,614.0,137.0,42.0,3675.0,174.0,1872.0,,0.0,,19.0,,865.0,521892.0,1.627931e6,,2.419503e6,203.0,0.0,97.0,0.0,2771.0,1588.0,78.0,0.0,450.0,450.0,4000.0,900.0,2244.0,1200.0,2900.0,1660.0
"""2022-11-25 14:…",1,9.09,1063.0,1063.0,290.9,307.77,290.17,290.12,290.12,8216.0,8683.0,63331.75,55930.0,37994.0,235.0,52.0,28.0,0.0,3682.5,5841.0,2173.75,4121.0,3914.0,,2680.0,,228.0,0.0,2055.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,16887.0,3122.0,,390.0,,620.0,0.0,162.0,2201.0,498.0,2405.0,,2405.0,,37.0,2.0,764.0,1.874133e6,5.962618e6,2.51892e6,2.297352e6,1215.0,0.0,0.0,0.0,2988.0,2723.0,2183.0,0.0,1200.0,1200.0,4000.0,1200.0,4341.0,1400.0,3700.0,1810.0
"""2023-04-08 21:…",0,0.35,1587.0,1587.0,137.31,140.85,137.31,137.31,180.0,6085.0,6223.0,45094.25,45574.0,28535.0,0.0,199.0,0.0,0.0,0.0,4639.0,1167.25,0.0,2844.0,,0.0,,192.0,0.0,1394.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,11737.0,155.0,,19.0,,614.0,446.0,,2415.0,662.0,1900.0,,0.0,1.0,338.0,2.0,805.0,592454.0,1.842572e6,1.512237e6,1.97461e6,793.0,0.0,264.0,289.0,4238.0,136.0,0.0,0.0,600.0,1200.0,4000.0,1200.0,4365.0,2000.0,2385.0,1810.0
"""2023-05-01 07:…",0,3.55,1031.0,1031.0,86.46,91.85,84.2,89.56,110.06,4915.0,6289.0,37746.0,33336.0,18828.0,145.0,295.0,10.0,0.0,5041.5,7489.75,1430.75,97.0,2623.0,,704.0,,112.0,0.0,391.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,4018.0,307.0,,18.0,,622.0,38.0,57.0,3043.0,158.0,1513.0,,732.0,,345.0,1.0,1877.0,462364.0,1.575017e6,1.73774e6,2.358161e6,231.0,0.0,0.0,0.0,1371.0,1029.0,1928.0,0.0,450.0,450.0,4000.0,1200.0,1840.0,1150.0,2227.0,1660.0


In [239]:
df_final.to_pandas().to_csv("0_df_final_de-ch.csv", index=False)

### Converting to UTC

In [None]:
(
    pl.read_csv("./0_df_final_ch-de.csv")
    .sort("date")
    .with_columns(pl.col("date").str.to_datetime().dt.replace_time_zone("Europe/Zurich", ambiguous="earliest"))
    .with_columns(pl.col("date").is_duplicated().alias("tmp"))
    .with_columns(
        pl.when((pl.col("tmp") == True) & (pl.col("dst") == 0))
        .then(pl.col("date") + pl.duration(hours=1))
        .otherwise(pl.col("date"))
        .alias("date")
    )
    .with_columns(pl.col("date").dt.convert_time_zone("UTC"))
    .drop("tmp")
    .to_pandas()
    .to_csv("0_df_final_ch-de_UTC.csv", index=False)
)

In [None]:
(
    pl.read_csv("0_df_final_de-ch.csv")
    .sort("date")
    .with_columns(pl.col("date").str.to_datetime().dt.replace_time_zone("Europe/Zurich", ambiguous="earliest"))
    .with_columns(pl.col("date").is_duplicated().alias("tmp"))
    .with_columns(
        pl.when((pl.col("tmp") == True) & (pl.col("dst") == 0))
        .then(pl.col("date") + pl.duration(hours=1))
        .otherwise(pl.col("date"))
        .alias("date")
    )
    .with_columns(pl.col("date").dt.convert_time_zone("UTC"))
    .drop("tmp")
    .to_pandas()
    .to_csv("0_df_final_de-ch_UTC.csv", index=False)
)

FileNotFoundError: The system cannot find the path specified. (os error 3): ../00_Data Retrieval and Cleaning/0_df_final_de-ch.csv