# 0. Import packages

In [1]:
# Import necessary packages
import pandas as pd
from entsoe import EntsoeRawClient, EntsoePandasClient

# 1. Import ENTSOE data

In [2]:
# API generation token
API_token = '133f95af-3dc6-419e-b083-3b10b7275ae7'

In [3]:
# Set variables
client = EntsoePandasClient(api_key=API_token)
country_code = 'BE'  # Belgium
type_marketagreement_type = 'A01'
contract_marketagreement_type = "A01"
years = [2017, 2018, 2019, 2020, 2021, 2022] # Set years to extract the data from, determined by time window 
countries_from = ['DE_LU', 'FR', 'GB', 'NL'] # Set countries to extract import data from
country_code_to = 'BE' 

## Generation per generation unit

In [4]:
# Get generation per generation unit data for every year
for year in years:
    
    # Set time variables
    start = pd.Timestamp(str(year) + '0101', tz='Europe/Brussels')
    end = pd.Timestamp(str(year+1) + '0101', tz='Europe/Brussels')
    
    # Get generation data
    generation = client.query_generation_per_plant(country_code, start=start,end=end, psr_type=None)

    # Write to csv
    generation.to_csv('./Data/Raw/generation_' + str(year) + '.csv', sep=';')

## Generation per production type

In [5]:
# Get generation per production type data for every year
for year in years:
    
    # Set time variables
    start = pd.Timestamp(str(year) + '0101', tz='Europe/Brussels')
    end = pd.Timestamp(str(year+1) + '0101', tz='Europe/Brussels')
    
    # Get generation data
    production_type = client.query_generation(country_code, start=start,end=end, psr_type=None)

    # Write to csv
    production_type.to_csv('./Data/Raw/production_type_' + str(year) + '.csv', sep=';')

## Load 

In [6]:
# Get load data for every year
for year in years:
    
    # Set time variables
    start = pd.Timestamp(str(year) + '0101', tz='Europe/Brussels')
    end = pd.Timestamp(str(year+1) + '0101', tz='Europe/Brussels')
    
    # Get load data
    load = client.query_load(country_code, start=start,end=end)

    # Write to csv
    load.to_csv('./Data/Raw/load_' + str(year) + '.csv', sep=';')

## Day ahead prices

In [7]:
# Get day ahead prices data for every year
for year in years:
    
    # Set time variables
    start = pd.Timestamp(str(year) + '0101', tz='Europe/Brussels')
    end = pd.Timestamp(str(year+1) + '0101', tz='Europe/Brussels')
    
    # Get day ahead prices in Series
    day_ahead = client.query_day_ahead_prices(country_code, start=start,end=end)
    
    # Convert Series to Dataframe
    day_ahead_df = day_ahead.to_frame(name = 'day_ahead_price')
    
    # Drop last row
    day_ahead_df.drop(day_ahead_df.tail(1).index,inplace=True)
    
    # Write to csv
    day_ahead_df.to_csv('./Data/Raw/day_ahead_prices_' + str(year) + '.csv', sep=';')

## Scheduled commercial exchanges (to Belgium)

In [8]:
# Get import data from every country to Belgium
 
for country in countries_from:
    
    for year in years:
        
        try:
            # Set time variables
            start = pd.Timestamp(str(year) + '0101', tz='Europe/Brussels')
            end = pd.Timestamp(str(year+1) + '0101', tz='Europe/Brussels')
        
            # Get exchanges in pandas Series
            scheduled_exchanges = client.query_scheduled_exchanges(country, country_code_to, start=start, end=end, dayahead=False)
    
            # Convert Series to Dataframe
            scheduled_exchanges_df = scheduled_exchanges.to_frame(name='scheduled exchange ' + country + ' --> BE')
    
            # Write to csv
            scheduled_exchanges_df.to_csv('./Data/Raw/scheduled_exchanges_' + country + '_BE_' + str(year) + '.csv', sep=';')

        except:
            pass

## Unavailability of generation units

In [9]:
# Get unavailability data for every year
for year in years:
    
    # Set time variables
    start = pd.Timestamp(str(year) + '0101', tz='Europe/Brussels')
    end = pd.Timestamp(str(year+1) + '0101', tz='Europe/Brussels')
    
    # Get data
    unavailability_generation_units = client.query_unavailability_of_generation_units(country_code, start=start,end=end, docstatus=None, periodstartupdate=None, periodendupdate=None)
    
    # Write to csv
    unavailability_generation_units.to_csv('./Data/Raw/unavailability_generation_units_' + str(year) + '.csv', sep=';')

# 2. Concatenate dataframes together

## Day ahead prices

In [14]:
# Read csv files
day_ahead_2022_df = pd.read_csv('./Data/Raw/day_ahead_prices_2022.csv', sep=';')
day_ahead_2021_df = pd.read_csv('./Data/Raw/day_ahead_prices_2021.csv', sep=';')
day_ahead_2020_df = pd.read_csv('./Data/Raw/day_ahead_prices_2020.csv', sep=';')
day_ahead_2019_df = pd.read_csv('./Data/Raw/day_ahead_prices_2019.csv', sep=';')
day_ahead_2018_df = pd.read_csv('./Data/Raw/day_ahead_prices_2018.csv', sep=';')
day_ahead_2017_df = pd.read_csv('./Data/Raw/day_ahead_prices_2017.csv', sep=';')

In [15]:
# Concatenate dataframes
day_ahead_df = pd.concat([day_ahead_2022_df, day_ahead_2021_df, day_ahead_2020_df, day_ahead_2019_df, day_ahead_2018_df, day_ahead_2017_df])

In [16]:
# Check shapes
print(day_ahead_2022_df.shape)
print(day_ahead_2021_df.shape)
print(day_ahead_2020_df.shape)
print(day_ahead_2019_df.shape)
print(day_ahead_2018_df.shape)
print(day_ahead_2017_df.shape)
print(day_ahead_df.shape)

(8760, 2)
(8760, 2)
(8784, 2)
(8760, 2)
(8760, 2)
(8760, 2)
(52584, 2)


## Generation per unit

In [17]:
# Read csv files
generation_2022_df = pd.read_csv('./Data/Raw/generation_2022.csv', sep=';', header=0, skiprows=[2])
generation_2021_df = pd.read_csv('./Data/Raw/generation_2021.csv', sep=';', header=0, skiprows=[1,2])
generation_2020_df = pd.read_csv('./Data/Raw/generation_2020.csv', sep=';', header=0, skiprows=[1,2])
generation_2019_df = pd.read_csv('./Data/Raw/generation_2019.csv', sep=';', header=0, skiprows=[1,2])
generation_2018_df = pd.read_csv('./Data/Raw/generation_2018.csv', sep=';', header=0, skiprows=[1,2])
generation_2017_df = pd.read_csv('./Data/Raw/generation_2017.csv', sep=';', header=0, skiprows=[1,2])

In [18]:
# Check
generation_2022_df.head(5)

Unnamed: 0.1,Unnamed: 0,Amercoeur 1 R GT,Amercoeur 1 R ST,Belwind Phase 1,COO 1 T,COO 2 T,COO 3 T,COO 4 T,COO 5 T,COO 6 T,...,TIHANGE 1N,TIHANGE 1S,TIHANGE 2,TIHANGE 3,Thorntonbank - C-Power - Area NE,Thorntonbank - C-Power - Area SW,VILVOORDE GT,VILVOORDE ST,Zandvliet Power,Zelzate 2 Knippegroen
0,,Fossil Gas,Fossil Gas,Wind Offshore,Hydro Pumped Storage,Hydro Pumped Storage,Hydro Pumped Storage,Hydro Pumped Storage,Hydro Pumped Storage,Hydro Pumped Storage,...,Nuclear,Nuclear,Nuclear,Nuclear,Wind Offshore,Wind Offshore,Fossil Gas,Fossil Gas,Fossil Gas,Fossil Gas
1,2022-01-01 00:00:00+01:00,100.0,94.0,114.0,0.0,0.0,0.0,0.0,0.0,0.0,...,486.0,491.0,1001.0,1033.0,104.0,128.0,0.0,0.0,0.0,302.0
2,2022-01-01 01:00:00+01:00,150.0,110.0,82.0,0.0,0.0,0.0,0.0,0.0,0.0,...,486.0,491.0,1002.0,1033.0,58.0,85.0,0.0,0.0,0.0,302.0
3,2022-01-01 02:00:00+01:00,123.0,101.0,83.0,0.0,0.0,0.0,0.0,0.0,0.0,...,486.0,491.0,1002.0,1034.0,56.0,101.0,0.0,0.0,0.0,299.0
4,2022-01-01 03:00:00+01:00,95.0,93.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,...,486.0,491.0,1002.0,1033.0,67.0,126.0,0.0,0.0,0.0,301.0


In [19]:
# Check
generation_2021_df.head(5)

Unnamed: 0.1,Unnamed: 0,Amercoeur 1 R GT,Amercoeur 1 R ST,Belwind Phase 1,COO 1 T,COO 2 T,COO 3 T,COO 4 T,COO 5 T,COO 6 T,...,TIHANGE 1N,TIHANGE 1S,TIHANGE 2,TIHANGE 3,Thorntonbank - C-Power - Area NE,Thorntonbank - C-Power - Area SW,VILVOORDE GT,Zandvliet Power,Zelzate 2 Knippegroen,VILVOORDE ST
0,2021-01-01 00:00:00+01:00,21.0,16.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,...,486.0,493.0,0.0,1040.0,2.0,15.0,0.0,129.0,156.0,
1,2021-01-01 01:00:00+01:00,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,...,484.0,494.0,0.0,1040.0,8.0,22.0,0.0,0.0,156.0,
2,2021-01-01 02:00:00+01:00,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,...,484.0,494.0,0.0,1040.0,8.0,20.0,0.0,0.0,149.0,
3,2021-01-01 03:00:00+01:00,0.0,0.0,22.0,0.0,0.0,0.0,0.0,0.0,0.0,...,486.0,491.0,0.0,1040.0,9.0,20.0,0.0,0.0,139.0,
4,2021-01-01 04:00:00+01:00,0.0,0.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,...,486.0,491.0,0.0,1039.0,9.0,16.0,0.0,0.0,155.0,


In [20]:
# Check
generation_2020_df.head(5)

Unnamed: 0.1,Unnamed: 0,Amercoeur 1 R GT,Amercoeur 1 R ST,Belwind Phase 1,COO 1 T,COO 2 T,COO 3 T,COO 4 T,COO 5 T,COO 6 T,...,TIHANGE 1S,TIHANGE 2,TIHANGE 3,Thorntonbank - C-Power - Area NE,Thorntonbank - C-Power - Area SW,VILVOORDE GT,Zandvliet Power,Zelzate 2 Knippegroen,Mermaid Offshore WP,Seastar Offshore WP
0,2020-01-01 00:00:00+01:00,143.0,107.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1019.0,1040.0,26.0,36.0,0.0,370.0,295.0,,
1,2020-01-01 01:00:00+01:00,155.0,112.0,33.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1018.0,1040.0,31.0,38.0,0.0,373.0,294.0,,
2,2020-01-01 02:00:00+01:00,119.0,101.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1018.0,1039.0,25.0,28.0,0.0,373.0,301.0,,
3,2020-01-01 03:00:00+01:00,159.0,113.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1018.0,1039.0,22.0,21.0,0.0,372.0,295.0,,
4,2020-01-01 04:00:00+01:00,181.0,120.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1018.0,1039.0,16.0,14.0,0.0,376.0,293.0,,


In [21]:
# Check
generation_2019_df.head(5)

Unnamed: 0.1,Unnamed: 0,Amercoeur 1 R GT,Amercoeur 1 R ST,Belwind Phase 1,COO 1 T,COO 2 T,COO 3 T,COO 4 T,COO 5 T,COO 6 T,...,TIHANGE 1S,TIHANGE 2,TIHANGE 3,Thorntonbank - C-Power - Area NE,Thorntonbank - C-Power - Area SW,VILVOORDE GT,Zandvliet Power,Zelzate 2 Knippegroen,Norther Offshore WP,Northwester 2
0,2019-01-01 00:00:00+01:00,0.0,0.0,69.0,0.0,0.0,0.0,0.0,71.0,0.0,...,492.0,0.0,0.0,44.0,68.0,0.0,94.0,259.0,,
1,2019-01-01 01:00:00+01:00,0.0,0.0,77.0,0.0,0.0,0.0,168.0,179.0,0.0,...,493.0,0.0,0.0,47.0,71.0,0.0,0.0,253.0,,
2,2019-01-01 02:00:00+01:00,0.0,0.0,85.0,0.0,0.0,0.0,154.0,151.0,0.0,...,492.0,0.0,9.0,48.0,82.0,0.0,0.0,280.0,,
3,2019-01-01 03:00:00+01:00,0.0,0.0,78.0,0.0,0.0,0.0,1.0,4.0,0.0,...,492.0,0.0,39.0,44.0,85.0,0.0,0.0,270.0,,
4,2019-01-01 04:00:00+01:00,0.0,0.0,96.0,0.0,0.0,0.0,0.0,0.0,0.0,...,492.0,0.0,41.0,50.0,82.0,0.0,0.0,269.0,,


In [22]:
# Check
generation_2018_df.head(5)

Unnamed: 0.1,Unnamed: 0,Amercoeur 1 R GT,Amercoeur 1 R ST,Belwind Phase 1,COO 1 T,COO 2 T,COO 3 T,COO 4 T,COO 5 T,COO 6 T,...,TIHANGE 3,Thorntonbank - C-Power - Area NE,Thorntonbank - C-Power - Area SW,Zandvliet Power,Zelzate 2 Knippegroen,Rentel Offshore WP,EDF Luminus Seraing GT1,EDF Luminus Seraing GT2,SERAING TV,VILVOORDE GT
0,2018-01-01 00:00:00+01:00,0.0,0.0,149.0,0.0,0.0,0.0,0.0,58.0,109.0,...,1041.0,145.0,163.0,0.0,250.0,,,,,
1,2018-01-01 01:00:00+01:00,0.0,0.0,154.0,0.0,0.0,0.0,0.0,0.0,2.0,...,1042.0,145.0,160.0,0.0,263.0,,,,,
2,2018-01-01 02:00:00+01:00,0.0,0.0,153.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1042.0,141.0,157.0,0.0,268.0,,,,,
3,2018-01-01 03:00:00+01:00,0.0,0.0,154.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1042.0,138.0,157.0,0.0,257.0,,,,,
4,2018-01-01 04:00:00+01:00,0.0,0.0,154.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1042.0,136.0,156.0,0.0,257.0,,,,,


In [23]:
# Check
generation_2017_df.head(5)

Unnamed: 0.1,Unnamed: 0,Amercoeur 1 R GT,Amercoeur 1 R ST,Belwind Phase 1,COO 1 T,COO 2 T,COO 3 T,COO 4 T,COO 5 T,COO 6 T,...,Scheldelaan Exxonmobil,T-power Beringen,TIHANGE 1N,TIHANGE 1S,TIHANGE 2,TIHANGE 3,Thorntonbank - C-Power - Area NE,Thorntonbank - C-Power - Area SW,Zandvliet Power,Zelzate 2 Knippegroen
0,2017-01-01 00:00:00+01:00,193.0,127.0,115.0,0.0,0.0,0.0,0.0,0.0,0.0,...,124.0,303.0,0.0,0.0,1025.0,1044.0,41.0,78.0,112.0,275.0
1,2017-01-01 01:00:00+01:00,230.0,139.0,111.0,0.0,0.0,89.0,138.0,0.0,0.0,...,120.0,397.0,0.0,0.0,1025.0,1045.0,44.0,82.0,0.0,252.0
2,2017-01-01 02:00:00+01:00,209.0,133.0,114.0,0.0,0.0,0.0,33.0,0.0,0.0,...,112.0,408.0,0.0,0.0,1025.0,1045.0,48.0,82.0,0.0,277.0
3,2017-01-01 03:00:00+01:00,193.0,125.0,101.0,0.0,0.0,0.0,0.0,0.0,0.0,...,112.0,408.0,0.0,0.0,1024.0,1045.0,53.0,75.0,0.0,299.0
4,2017-01-01 04:00:00+01:00,212.0,133.0,131.0,0.0,0.0,0.0,0.0,0.0,0.0,...,118.0,396.0,0.0,0.0,1025.0,1045.0,66.0,105.0,0.0,302.0


In [24]:
# Concatenate dataframes
generation_df = pd.concat([generation_2022_df, generation_2021_df, generation_2020_df, generation_2019_df, generation_2018_df, generation_2017_df])

In [25]:
# Check shapes
print(generation_2022_df.shape)
print(generation_2021_df.shape)
print(generation_2020_df.shape)
print(generation_2019_df.shape)
print(generation_2018_df.shape)
print(generation_2017_df.shape)
print(generation_df.shape)

(8761, 48)
(8760, 48)
(8784, 47)
(8760, 45)
(8760, 43)
(8760, 40)
(52585, 50)


## Generation per production type

In [26]:
# Read csv files
production_type_2022_df = pd.read_csv('./Data/Raw/production_type_2022.csv', sep=';', header=0)
production_type_2021_df = pd.read_csv('./Data/Raw/production_type_2021.csv', sep=';', header=0, skiprows=[1])
production_type_2020_df = pd.read_csv('./Data/Raw/production_type_2020.csv', sep=';', header=0, skiprows=[1])
production_type_2019_df = pd.read_csv('./Data/Raw/production_type_2019.csv', sep=';', header=0, skiprows=[1])
production_type_2018_df = pd.read_csv('./Data/Raw/production_type_2018.csv', sep=';', header=0, skiprows=[1])
production_type_2017_df = pd.read_csv('./Data/Raw/production_type_2017.csv', sep=';', header=0, skiprows=[1])

In [27]:
# Concatenate dataframes
production_df = pd.concat([production_type_2022_df, production_type_2021_df, production_type_2020_df, production_type_2019_df, production_type_2018_df, production_type_2017_df])

In [28]:
# Check shapes
print(production_type_2022_df.shape)
print(production_type_2021_df.shape)
print(production_type_2020_df.shape)
print(production_type_2019_df.shape)
print(production_type_2018_df.shape)
print(production_type_2017_df.shape)
print(production_df.shape)

(8761, 14)
(8760, 14)
(8784, 14)
(8760, 15)
(8760, 15)
(8760, 15)
(52585, 15)


## Load

In [29]:
# Read csv files
load_2022_df = pd.read_csv('./Data/Raw/load_2022.csv', sep=';', header=0)
load_2021_df = pd.read_csv('./Data/Raw/load_2021.csv', sep=';', header=0)
load_2020_df = pd.read_csv('./Data/Raw/load_2020.csv', sep=';', header=0)
load_2019_df = pd.read_csv('./Data/Raw/load_2019.csv', sep=';', header=0)
load_2018_df = pd.read_csv('./Data/Raw/load_2018.csv', sep=';', header=0)
load_2017_df = pd.read_csv('./Data/Raw/load_2017.csv', sep=';', header=0)

In [30]:
# Concatenate dataframes
load_df = pd.concat([load_2022_df, load_2021_df, load_2020_df, load_2019_df, load_2018_df, load_2017_df])

In [31]:
# Check shapes
print(load_2022_df.shape)
print(load_2021_df.shape)
print(load_2020_df.shape)
print(load_2019_df.shape)
print(load_2018_df.shape)
print(load_2017_df.shape)
print(load_df.shape)

(35040, 2)
(35040, 2)
(35136, 2)
(35040, 2)
(35040, 2)
(35040, 2)
(210336, 2)


## Scheduled commercial exchanges

In [32]:
# From Germany and Luxemburg
exchanges_DE_LU_2022_df = pd.read_csv('./Data/Raw/scheduled_exchanges_DE_LU_BE_2022.csv', sep=';')
exchanges_DE_LU_2021_df = pd.read_csv('./Data/Raw/scheduled_exchanges_DE_LU_BE_2021.csv', sep=';')
exchanges_DE_LU_2020_df = pd.read_csv('./Data/Raw/scheduled_exchanges_DE_LU_BE_2020.csv', sep=';')

exchanges_DE_LU_df = pd.concat([exchanges_DE_LU_2022_df, exchanges_DE_LU_2021_df, exchanges_DE_LU_2020_df])

In [33]:
# Check shapes
print(exchanges_DE_LU_2022_df.shape)
print(exchanges_DE_LU_2021_df.shape)
print(exchanges_DE_LU_2020_df.shape)
print(exchanges_DE_LU_df.shape)

(35040, 2)
(35040, 2)
(4224, 2)
(74304, 2)


In [34]:
# From France
exchanges_FR_2022_df = pd.read_csv('./Data/Raw/scheduled_exchanges_FR_BE_2022.csv', sep=';')
exchanges_FR_2021_df = pd.read_csv('./Data/Raw/scheduled_exchanges_FR_BE_2021.csv', sep=';')
exchanges_FR_2020_df = pd.read_csv('./Data/Raw/scheduled_exchanges_FR_BE_2020.csv', sep=';')
exchanges_FR_2019_df = pd.read_csv('./Data/Raw/scheduled_exchanges_FR_BE_2019.csv', sep=';')
exchanges_FR_2018_df = pd.read_csv('./Data/Raw/scheduled_exchanges_FR_BE_2018.csv', sep=';')
exchanges_FR_2017_df = pd.read_csv('./Data/Raw/scheduled_exchanges_FR_BE_2017.csv', sep=';')

exchanges_FR_df = pd.concat([exchanges_FR_2022_df, exchanges_FR_2021_df, exchanges_FR_2020_df, exchanges_FR_2019_df, exchanges_FR_2018_df, exchanges_FR_2017_df])

In [35]:
# Check shapes
print(exchanges_FR_2022_df.shape)
print(exchanges_FR_2021_df.shape)
print(exchanges_FR_2020_df.shape)
print(exchanges_FR_2019_df.shape)
print(exchanges_FR_2018_df.shape)
print(exchanges_FR_2017_df.shape)
print(exchanges_FR_df.shape)

(8760, 2)
(8760, 2)
(8784, 2)
(8760, 2)
(8760, 2)
(8760, 2)
(52584, 2)


In [36]:
# From Great-Britain
exchanges_GB_2022_df = pd.read_csv('./Data/Raw/scheduled_exchanges_GB_BE_2022.csv', sep=';')
exchanges_GB_2021_df = pd.read_csv('./Data/Raw/scheduled_exchanges_GB_BE_2021.csv', sep=';')
exchanges_GB_2020_df = pd.read_csv('./Data/Raw/scheduled_exchanges_GB_BE_2020.csv', sep=';')
exchanges_GB_2019_df = pd.read_csv('./Data/Raw/scheduled_exchanges_GB_BE_2019.csv', sep=';')

exchanges_GB_df = pd.concat([exchanges_GB_2022_df, exchanges_GB_2021_df, exchanges_GB_2020_df, exchanges_GB_2019_df])

In [37]:
# Check shapes
print(exchanges_GB_2022_df.shape)
print(exchanges_GB_2021_df.shape)
print(exchanges_GB_2020_df.shape)
print(exchanges_GB_2019_df.shape)
print(exchanges_GB_df.shape)

(8760, 2)
(8760, 2)
(8784, 2)
(1080, 2)
(27384, 2)


In [38]:
# From The Netherlands
exchanges_NL_2022_df = pd.read_csv('./Data/Raw/scheduled_exchanges_NL_BE_2022.csv', sep=';')
exchanges_NL_2021_df = pd.read_csv('./Data/Raw/scheduled_exchanges_NL_BE_2021.csv', sep=';')
exchanges_NL_2020_df = pd.read_csv('./Data/Raw/scheduled_exchanges_NL_BE_2020.csv', sep=';')
exchanges_NL_2019_df = pd.read_csv('./Data/Raw/scheduled_exchanges_NL_BE_2019.csv', sep=';')
exchanges_NL_2018_df = pd.read_csv('./Data/Raw/scheduled_exchanges_NL_BE_2018.csv', sep=';')
exchanges_NL_2017_df = pd.read_csv('./Data/Raw/scheduled_exchanges_NL_BE_2017.csv', sep=';')

exchanges_NL_df = pd.concat([exchanges_NL_2022_df, exchanges_NL_2021_df, exchanges_NL_2020_df, exchanges_NL_2019_df, exchanges_NL_2018_df, exchanges_NL_2017_df])

In [39]:
# Check shapes
print(exchanges_NL_2022_df.shape)
print(exchanges_NL_2021_df.shape)
print(exchanges_NL_2020_df.shape)
print(exchanges_NL_2019_df.shape)
print(exchanges_NL_2018_df.shape)
print(exchanges_NL_2017_df.shape)
print(exchanges_NL_df.shape)

(35040, 2)
(35040, 2)
(13245, 2)
(8760, 2)
(8760, 2)
(8760, 2)
(109605, 2)


## Unavailability of generation units

In [40]:
# Read csv files
outages_2022_df = pd.read_csv('./Data/Raw/unavailability_generation_units_2022.csv', sep=';')
outages_2021_df = pd.read_csv('./Data/Raw/unavailability_generation_units_2021.csv', sep=';')
outages_2020_df = pd.read_csv('./Data/Raw/unavailability_generation_units_2020.csv', sep=';')
outages_2019_df = pd.read_csv('./Data/Raw/unavailability_generation_units_2019.csv', sep=';')
outages_2018_df = pd.read_csv('./Data/Raw/unavailability_generation_units_2018.csv', sep=';')
outages_2017_df = pd.read_csv('./Data/Raw/unavailability_generation_units_2017.csv', sep=';')

In [41]:
# Check shapes
print(outages_2022_df.shape)
print(outages_2021_df.shape)
print(outages_2020_df.shape)
print(outages_2019_df.shape)
print(outages_2018_df.shape)
print(outages_2017_df.shape)

(781, 18)
(382, 18)
(442, 18)
(315, 18)
(319, 18)
(224, 18)


In [42]:
# Check
outages_2019_df.head(5)

Unnamed: 0,created_doc_time,avail_qty,biddingzone_domain,businesstype,curvetype,docstatus,end,mrid,nominal_power,plant_type,production_resource_id,production_resource_location,production_resource_name,pstn,qty_uom,resolution,revision,start
0,2016-06-02 23:43:10+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2019-06-17 17:00:00+02:00,9MgHJEbgcY_l1PLx7swpfA,230.0,Hydro Pumped Storage,22WCOOXII000070C,Belgium,COO II T,1,MAW,PT1M,3,2019-05-04 08:00:00+02:00
1,2018-02-27 09:16:38+01:00,0.0,BE,Planned maintenance,A03,Cancelled,2019-04-01 23:45:00+02:00,d5_M1cYYKKK1MlP0BbgFFw,422.0,Fossil Gas,22WT-POWE000244W,Belgium,T-power Beringen,1,MAW,PT15M,2,2019-03-29 00:00:00+01:00
2,2018-06-11 10:02:29+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2019-06-17 17:00:00+02:00,FKBElzHuxDvjqUP0YtsULA,230.0,Hydro Pumped Storage,22WCOOXII000070C,Belgium,COO II T,1,MAW,PT60M,2,2019-04-20 08:00:00+02:00
3,2018-06-11 10:02:49+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2019-06-17 17:00:00+02:00,VMwmkkrAIWZqkVHDFSJw5g,230.0,Hydro Pumped Storage,22WCOOXII000070C,Belgium,COO II T,1,MAW,PT60M,3,2019-05-04 08:00:00+02:00
4,2018-06-11 10:02:58+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2019-06-17 17:00:00+02:00,I5DonI_WJcnEnDgkn3PGEw,158.0,Hydro Pumped Storage,22WCOOXIX000067T,Belgium,COO I T,1,MAW,PT60M,3,2019-05-04 08:00:00+02:00


In [43]:
# Check
outages_2020_df.head(5)

Unnamed: 0.1,Unnamed: 0,avail_qty,biddingzone_domain,businesstype,curvetype,docstatus,end,mrid,nominal_power,plant_type,production_resource_id,production_resource_location,production_resource_name,pstn,qty_uom,resolution,revision,start
0,2018-05-03 10:47:27+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-09-01 00:00:00+02:00,Rh6XAfuhU5VsHL3xRrCBOQ,167.0,Fossil Gas,22WHERDER0001288,Belgium,HERDERSBRUG STEG,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00
1,2018-09-17 09:22:32+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-06-01 17:00:00+02:00,YGGcCIDsKZA6JXWLj2nGbA,158.0,Hydro Pumped Storage,22WCOOXIX000067T,Belgium,COO I T,1,MAW,PT60M,2,2020-05-16 08:00:00+02:00
2,2018-09-17 09:50:03+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-08-01 00:00:00+02:00,CqDOKPPZuQ9aCD6XH-DCXQ,158.0,Hydro Pumped Storage,22WCOOXIX000067T,Belgium,COO I T,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00
3,2018-09-17 09:50:13+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-08-01 00:00:00+02:00,3MUlcUp-bz_qHpllsp3Lbg,230.0,Hydro Pumped Storage,22WCOOXII000070C,Belgium,COO II T,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00
4,2018-09-17 09:50:14+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-08-01 00:00:00+02:00,5kgFVyVbuZ0YiRXCjzUNdA,230.0,Hydro Pumped Storage,22WCOOXII000070C,Belgium,COO II T,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00


In [44]:
# Rename 'Unnamed: 0' to 'created_doc_time' in 2020, 2021, 2022
outages_2020_df.rename(columns={'Unnamed: 0':'created_doc_time'}, inplace=True)
outages_2021_df.rename(columns={'Unnamed: 0':'created_doc_time'}, inplace=True)
outages_2022_df.rename(columns={'Unnamed: 0':'created_doc_time'}, inplace=True)

In [45]:
# Check
outages_2020_df.head(5)

Unnamed: 0,created_doc_time,avail_qty,biddingzone_domain,businesstype,curvetype,docstatus,end,mrid,nominal_power,plant_type,production_resource_id,production_resource_location,production_resource_name,pstn,qty_uom,resolution,revision,start
0,2018-05-03 10:47:27+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-09-01 00:00:00+02:00,Rh6XAfuhU5VsHL3xRrCBOQ,167.0,Fossil Gas,22WHERDER0001288,Belgium,HERDERSBRUG STEG,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00
1,2018-09-17 09:22:32+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-06-01 17:00:00+02:00,YGGcCIDsKZA6JXWLj2nGbA,158.0,Hydro Pumped Storage,22WCOOXIX000067T,Belgium,COO I T,1,MAW,PT60M,2,2020-05-16 08:00:00+02:00
2,2018-09-17 09:50:03+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-08-01 00:00:00+02:00,CqDOKPPZuQ9aCD6XH-DCXQ,158.0,Hydro Pumped Storage,22WCOOXIX000067T,Belgium,COO I T,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00
3,2018-09-17 09:50:13+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-08-01 00:00:00+02:00,3MUlcUp-bz_qHpllsp3Lbg,230.0,Hydro Pumped Storage,22WCOOXII000070C,Belgium,COO II T,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00
4,2018-09-17 09:50:14+02:00,0.0,BE,Planned maintenance,A03,Cancelled,2020-08-01 00:00:00+02:00,5kgFVyVbuZ0YiRXCjzUNdA,230.0,Hydro Pumped Storage,22WCOOXII000070C,Belgium,COO II T,1,MAW,PT60M,2,2020-05-01 00:00:00+02:00


In [46]:
# Concatenate dataframes
outages_df = pd.concat([outages_2022_df, outages_2021_df, outages_2020_df, outages_2019_df, outages_2018_df, outages_2017_df])

In [47]:
# Check shape
print(outages_df.shape)

(2463, 18)


# 3. Write to csv

In [48]:
# Write full dataframe to csv file
day_ahead_df.to_csv('./Data/Initial/day_ahead_prices_full.csv', sep=';')
generation_df.to_csv('./Data/Initial/generation_full.csv', sep=';')
production_df.to_csv('./Data/Initial/production_type_full.csv', sep=';')
load_df.to_csv('./Data/Initial/load_full.csv', sep=';')
exchanges_DE_LU_df.to_csv('./Data/Initial/scheduled_exchanges_DE_LU_BE_full.csv', sep=';')
exchanges_FR_df.to_csv('./Data/Initial/scheduled_exchanges_FR_BE_full.csv', sep=';')
exchanges_GB_df.to_csv('./Data/Initial/scheduled_exchanges_GB_BE_full.csv', sep=';')
exchanges_NL_df.to_csv('./Data/Initial/scheduled_exchanges_NL_BE_full.csv', sep=';')
outages_df.to_csv('./Data/Initial/outages_full.csv', sep=';')