

- How to deal with different columns in generation data for old (`DE_AT_LU` until 2018/09/30) and new bidding zone (`DE_LU` since 2018/10/01)? Old data contains all columns from new data but also additional columns, mostly about `'Actual Consumption'`, and one extra category `'Fossil Coal-derived gas Actual Aggregated'`.
- Which time span to include in general for training data?

## Data-loading playground with `entsoe-py`

In [1]:
import pandas as pd
#import plotly.express as px
#import matplotlib.pyplot as plt

In [44]:
def load_data(start_date, end_date, time_zone, api_key, country_code):
    """
    Load data for requested time interval.
    
    Data contains actual aggregated generation per production type and actual total load for Germany.
    
    Params
    ------
    start_date : str
                 start date as "yyyymmdd"
    end_date : str
               end date as "yyyymmdd"
    time_zone : str
                time zone as string, e.g. "Europe/Berlin"
    api_key : str
              RESTful API web key
    """
    from entsoe import EntsoePandasClient
    # Initialize client and settings.
    client = EntsoePandasClient(api_key=api_key)
    start = pd.Timestamp(start_date, tz=time_zone)
    end = pd.Timestamp(end_date, tz=time_zone)
    # Query data and save to dataframe.
    df_load = client.query_load(country_code, start=start, end=end)
    df_gen = client.query_generation(country_code, start=start, end=end, psr_type=None)
    df_gen.columns = [" ".join(a) for a in df_gen.columns.to_flat_index()]

    df_final = pd.concat([df_load, df_gen])
    
    return df_final

In [45]:
# SETTINGS
api_key = "6e68642c-8403-4caa-af31-bda40b8c67f6"
#country_code_from = 'FR'  # France
#country_code_to = 'DE_LU' # Germany-Luxembourg
time_zone = "Europe/Berlin"

In [46]:
# Check data for new vs. old bidding zone.
start_date = "20201231"
end_date = "20210101"
country_code = "10Y1001A1001A83F"# Germany (new bidding zone, valid since 2018/10/01)
BZ_code = "DE_LU"
#country_code_old = "DE_AT_LU" # Germany (old bidding zone, valid until 2018/09/30)
#start_date_old = "20150101"
#end_date_old = "20150201"

In [47]:
df_cc = load_data(start_date, end_date, time_zone, api_key, country_code)
df_bz = load_data(start_date, end_date, time_zone, api_key, BZ_code)

In [48]:
print(df_cc.iloc[0], df_bz.iloc[0])

Actual Load                                          45993.0
Biomass Actual Aggregated                                NaN
Fossil Brown coal/Lignite Actual Aggregated              NaN
Fossil Gas Actual Aggregated                             NaN
Fossil Hard coal Actual Aggregated                       NaN
Fossil Oil Actual Aggregated                             NaN
Fossil Oil Actual Consumption                            NaN
Geothermal Actual Aggregated                             NaN
Hydro Pumped Storage Actual Aggregated                   NaN
Hydro Pumped Storage Actual Consumption                  NaN
Hydro Run-of-river and poundage Actual Aggregated        NaN
Hydro Water Reservoir Actual Aggregated                  NaN
Nuclear Actual Aggregated                                NaN
Other Actual Aggregated                                  NaN
Other renewable Actual Aggregated                        NaN
Solar Actual Aggregated                                  NaN
Waste Actual Aggregated 

In [24]:
df_old = load_data(start_date_old, end_date_old, time_zone, api_key, country_code)

In [25]:
print(f"New: Columns {df_new.columns} and shape {df_new.shape}.")
print(f"Old: Columns {df_old.columns} and shape {df_old.shape}.")
columns_intersect = df_new.columns.intersection(df_old.columns)
print(columns_intersect, columns_intersect.shape)
print(df_new.columns == columns_intersect)
columns_diff = df_old.columns.difference(df_new.columns)
print(columns_diff, columns_diff.shape)

New: Columns Index(['Actual Load', 'Biomass Actual Aggregated',
       'Fossil Brown coal/Lignite Actual Aggregated',
       'Fossil Gas Actual Aggregated', 'Fossil Hard coal Actual Aggregated',
       'Fossil Oil Actual Aggregated', 'Fossil Oil Actual Consumption',
       'Geothermal Actual Aggregated',
       'Hydro Pumped Storage Actual Aggregated',
       'Hydro Pumped Storage Actual Consumption',
       'Hydro Run-of-river and poundage Actual Aggregated',
       'Hydro Water Reservoir Actual Aggregated', 'Nuclear Actual Aggregated',
       'Other Actual Aggregated', 'Other renewable Actual Aggregated',
       'Solar Actual Aggregated', 'Waste Actual Aggregated',
       'Wind Offshore Actual Aggregated', 'Wind Onshore Actual Aggregated'],
      dtype='object') and shape (192, 19).
Old: Columns Index(['Actual Load', 'Biomass Actual Aggregated',
       'Biomass Actual Consumption',
       'Fossil Brown coal/Lignite Actual Aggregated',
       'Fossil Brown coal/Lignite Actual Consumpt

In [15]:
# Check data-loading for time spans larger that 1y
start_date = "20190101"
end_date_1y = "20200101"
end_date_2y = "20210101"
country_code = "DE_LU"  # Germany (new bidding zone, valid since 2018/10/01)
df_1y = load_data(start_date, end_date_1y, time_zone, api_key, country_code)
df_2y = load_data(start_date, end_date_2y, time_zone, api_key, country_code)

In [18]:
#print(df_1y.shape, df_1y.columns)
#print(df_2y.shape, df_2y.columns)
columns_intersect = df_new.columns.intersection(df_1y.columns)
print(columns_intersect, columns_intersect.shape)
#print(df_new.columns == columns_intersect)
columns_diff = df_1y.columns.difference(df_new.columns)
print(columns_diff, columns_diff.shape)

Index(['Actual Load', 'Biomass Actual Aggregated',
       'Fossil Brown coal/Lignite Actual Aggregated',
       'Fossil Gas Actual Aggregated', 'Fossil Hard coal Actual Aggregated',
       'Fossil Oil Actual Aggregated', 'Fossil Oil Actual Consumption',
       'Geothermal Actual Aggregated',
       'Hydro Pumped Storage Actual Aggregated',
       'Hydro Pumped Storage Actual Consumption',
       'Hydro Run-of-river and poundage Actual Aggregated',
       'Hydro Water Reservoir Actual Aggregated', 'Nuclear Actual Aggregated',
       'Other Actual Aggregated', 'Other renewable Actual Aggregated',
       'Solar Actual Aggregated', 'Waste Actual Aggregated',
       'Wind Offshore Actual Aggregated', 'Wind Onshore Actual Aggregated'],
      dtype='object') (19,)
Index(['Fossil Coal-derived gas Actual Aggregated',
       'Fossil Gas Actual Consumption',
       'Hydro Water Reservoir Actual Consumption',
       'Nuclear Actual Consumption'],
      dtype='object') (4,)


### Complete parameter list
https://transparency.entsoe.eu/content/static_content/Static%20content/web%20api/Guide.html#_complete_parameter_list

### Queries returning Pandas Series

`client.query_day_ahead_prices(country_code, start=start,end=end)` <br>
`client.query_net_position(country_code, start=start, end=end, dayahead=True)` <br>
`client.query_crossborder_flows(country_code_from, country_code_to, start, end)` <br>
`client.query_scheduled_exchanges(country_code_from, country_code_to, start, end, dayahead=False)` <br>
`client.query_net_transfer_capacity_dayahead(country_code_from, country_code_to, start, end)` <br>
`client.query_net_transfer_capacity_weekahead(country_code_from, country_code_to, start, end)` <br>
`client.query_net_transfer_capacity_monthahead(country_code_from, country_code_to, start, end)` <br>
`client.query_net_transfer_capacity_yearahead(country_code_from, country_code_to, start, end)` <br>
`client.query_intraday_offered_capacity(country_code_from, country_code_to, start, end,implicit=True)` <br>
`client.query_offered_capacity(country_code_from, country_code_to, start, end, contract_marketagreement_type, implicit=True)` <br>
`client.query_aggregate_water_reservoirs_and_hydro_storage(country_code, start, end)`

### Queries returning Pandas DataFrames

`client.query_load(country_code, start=start,end=end)` <br>
`client.query_load_forecast(country_code, start=start,end=end)` <br>
`client.query_load_and_forecast(country_code, start=start, end=end)` <br>
`client.query_generation_forecast(country_code, start=start,end=end)` <br>
`client.query_wind_and_solar_forecast(country_code, start=start,end=end, psr_type=None)` <br>
`client.query_generation(country_code, start=start,end=end, psr_type=None)` <br>
`client.query_generation_per_plant(country_code, start=start,end=end, psr_type=None)` <br>
`client.query_installed_generation_capacity(country_code, start=start,end=end, psr_type=None)` <br>
`client.query_installed_generation_capacity_per_unit(country_code, start=start,end=end, psr_type=None)` <br>
`client.query_imbalance_prices(country_code, start=start,end=end, psr_type=None)` <br>
`client.query_contracted_reserve_prices(country_code, start, end, type_marketagreement_type, psr_type=None)` <br>
`client.query_contracted_reserve_amount(country_code, start, end, type_marketagreement_type, psr_type=None)` <br>
`client.query_unavailability_of_generation_units(country_code, start=start,end=end, docstatus=None, periodstartupdate=None, periodendupdate=None)` <br>
`client.query_unavailability_of_production_units(country_code, start, end, docstatus=None, periodstartupdate=None, periodendupdate=None)` <br>
`client.query_unavailability_transmission(country_code_from, country_code_to, start, end, docstatus=None, periodstartupdate=None, periodendupdate=None)` <br>
`client.query_withdrawn_unavailability_of_generation_units(country_code, start, end)` <br>
`client.query_import(country_code, start, end)` <br>
`client.query_generation_import(country_code, start, end)` <br>
`client.query_procured_balancing_capacity(country_code, start, end, process_type, type_marketagreement_type=None)`

## Load data from client

In [None]:
df = pd.DataFrame()
df["load forecast"] = client.query_load_forecast(country_code, start=start,end=end)
df["load"] = client.query_load(country_code, start=start,end=end)
df["load forecast error"] = df["load forecast"] - df["load"]
df["generation forecast"] = client.query_generation_forecast(country_code, start=start,end=end)

In [None]:
df_gen = client.query_generation(country_code, start=start,end=end, psr_type=None)
df_gen.head()

In [None]:
df_gen.xs(key="Actual Aggregated", level=1, axis=1)

In [None]:
fig, ax = plt.subplots(figsize=(14,7))
ax.plot(df_gen.xs(key="Actual Aggregated", level=1, axis=1))
ax.legend()

In [None]:
df["generation"] = df_gen.sum(axis=1)

In [None]:
df["generation forecast error"] = df["generation forecast"] - df["generation"]

In [None]:
px.line(df)

## Save to csv file

In [None]:
df.to_csv('entsoe.csv')