# Retrieving Data from the ENTSO-E Transparency Platform

In [12]:
import pandas as pd
import polars as pl
import polars.selectors as cs
import numpy as np
import plotnine as pn
from mizani.formatters import comma_format, custom_format, currency_format, percent_format
from IPython.display import clear_output, display
import os
import glob
from entsoe import EntsoePandasClient, EntsoeRawClient
from tqdm.notebook import tqdm
import itertools
import yaml

jama_colour = [
    "#374e55",
    "#df8f44",
    "#00a1d5",
    "#b24745",
    "#79af97",
]

pd.set_option("display.max.columns", 500)
pd.set_option("display.max.columns", 500)
background_colour = "#F2F2F2"
pn.theme_set(
    pn.theme_classic()
    + pn.theme(
        figure_size=[7, 7/1.618],
        text=pn.element_text(family="monospace"),
        plot_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        panel_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        legend_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
    )
)


%load_ext blackcellmagic
%matplotlib inline

The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


### Establishing Connection to ENTSO-E API

In [2]:
with open("../../config.yaml", "r") as f:
    config = yaml.safe_load(f)

client = EntsoePandasClient(api_key=config["entsoe"])

# Keep home timezone instead of UTC, can always convert to UTC later
start = pd.Timestamp("20190101", tz='Europe/Brussels')
end = pd.Timestamp("20240201", tz='Europe/Brussels')

### Day-Ahead Prices

In [3]:
country_codes = ["DE_LU", "CH", "FR", "IT", "AT"]

for country_code in tqdm(country_codes):
    try:
        data = client.query_day_ahead_prices(country_code=country_code, start=start, end=end, resolution="60min")
        df_out = pd.DataFrame(data, columns=["price"]).reset_index(names="date")
        df_out.to_csv(f"day_ahead_prices_{country_code}.csv", index=False)
    except:
        pass

  0%|          | 0/5 [00:00<?, ?it/s]

### Load

In [4]:
country_codes = ["DE_LU", "CH", "FR", "IT", "AT"]

for country_code in tqdm(country_codes):
    try:
        data = client.query_load(country_code=country_code, start=start, end=end)
        df_out = pd.DataFrame(data).reset_index().rename(columns={"index": "date", "Actual Load": "actual_load"})
        df_out.to_csv(f"actual_load_{country_code}.csv", index=False)
    except:
        pass

  0%|          | 0/5 [00:00<?, ?it/s]

### Generation: Forecast wind and solar

In [5]:
country_codes = ["DE_LU", "CH", "FR", "IT", "AT"]

for country_code in tqdm(country_codes):
    try:
        data = client.query_wind_and_solar_forecast(country_code=country_code, start=start, end=end)
        df_out = pd.DataFrame(data).reset_index().rename(columns={"index": "date", "Solar": "solar_forecast", "Wind Onshore": "wind_onshore_forecast", "Wind Offshore": "wind_offshore_forecast"})
        df_out.to_csv(f"wind_solar_forecast_{country_code}.csv", index=False)
    except:
        pass

  0%|          | 0/5 [00:00<?, ?it/s]

### Generation: Actual values all types

In [6]:
country_codes = ["DE_LU", "CH", "FR", "IT", "AT"]

for country_code in tqdm(country_codes):
    try:
        data = client.query_generation(country_code=country_code, start=start, end=end)
        df_out = pd.DataFrame(data).reset_index().rename(columns={"index": "date"})
        df_out.to_csv(f"generation_actual_all_{country_code}.csv", index=False)
    except:
        pass

  0%|          | 0/5 [00:00<?, ?it/s]

### Water Reservoirs

In [7]:
country_codes = ["DE_LU", "CH", "FR", "IT", "AT"]

for country_code in tqdm(country_codes):
    try:
        data = client.query_aggregate_water_reservoirs_and_hydro_storage(country_code=country_code, start=start, end=end)
        df_out = pd.DataFrame(data).reset_index().rename(columns={"index": "date", 0: "hydro_reservoir_storage_mwh"})
        df_out.to_csv(f"hydro_reservoir_storage_{country_code}.csv", index=False)
    except:
        pass

  0%|          | 0/5 [00:00<?, ?it/s]

### Crossborder Flows (real, physical)

In [8]:
country_codes = ["DE_LU", "CH", "FR", "IT", "AT"]
country_pairs = (
    pd.DataFrame(itertools.product(country_codes, country_codes))
    .rename(columns={0: "from_country", 1: "to_country"})
    .drop_duplicates()
    .query("from_country != to_country")
    .query("from_country == 'CH' or to_country == 'CH'")
    .reset_index(drop=True)
)

country_pairs

Unnamed: 0,from_country,to_country
0,DE_LU,CH
1,CH,DE_LU
2,CH,FR
3,CH,IT
4,CH,AT
5,FR,CH
6,IT,CH
7,AT,CH


In [9]:
for idx in tqdm(country_pairs.index):
    try:
        data = client.query_crossborder_flows(
            country_code_from=country_pairs.loc[idx, "from_country"],
            country_code_to=country_pairs.loc[idx, "to_country"],
            start=start,
            end=end,
        )
        df_out = (
            pd.DataFrame(data)
            .reset_index()
            .rename(columns={"index": "date", 0: "crossborder_physical_flow_mw"})
        )
        df_out.to_csv(f"crossborder_physical_flow_mw_{country_pairs.loc[idx, "from_country"]}_{country_pairs.loc[idx, "to_country"]}.csv", index=False)
    except:
        pass

  0%|          | 0/8 [00:00<?, ?it/s]

### Crossboarder Capacities (DA Forecast)

In [10]:
country_pairs

Unnamed: 0,from_country,to_country
0,DE_LU,CH
1,CH,DE_LU
2,CH,FR
3,CH,IT
4,CH,AT
5,FR,CH
6,IT,CH
7,AT,CH


In [11]:
for idx in tqdm(country_pairs.index):
    try:
        data = client.query_net_transfer_capacity_dayahead(
            country_code_from=country_pairs.loc[idx, "from_country"],
            country_code_to=country_pairs.loc[idx, "to_country"],
            start=start,
            end=end,
        )
        df_out = (
            pd.DataFrame(data)
            .reset_index()
            .rename(columns={"index": "date", 0: "crossborder_capacity_forecast_day_ahead_mw"})
        )
        df_out.to_csv(f"crossborder_capacity_forecast_{country_pairs.loc[idx, "from_country"]}_{country_pairs.loc[idx, "to_country"]}.csv", index=False)
    except:
        pass

  0%|          | 0/8 [00:00<?, ?it/s]