## Data aggregation of energy market prices in bidding zone NO1 (Norway Østlandet) and neighbouring zones for the year of 2022

### Dependencies

In [230]:
# importing dependencies to aggregate dataset
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from functools import reduce # used to merge the dataset

### Aggregation of pricing data
Pricing data is of spot (day-ahead) market prices in the currency Euro per MWh

Importing price data from each zone

In [231]:
# reading price csv to dataframes
prices_no1 = pd.read_csv("../datasets/prices/NO1 Day-ahead Prices_202201010000-202301010000.csv")
prices_no2 = pd.read_csv("../datasets/prices/NO2 Day-ahead Prices_202201010000-202301010000.csv")
prices_no3 = pd.read_csv("../datasets/prices/NO3 Day-ahead Prices_202201010000-202301010000.csv")
prices_no5 = pd.read_csv("../datasets/prices/NO5 Day-ahead Prices_202201010000-202301010000.csv")
prices_se3 = pd.read_csv("../datasets/prices/SE3 Day-ahead Prices_202201010000-202301010000.csv")

# creating list of price dataframes
prices_dataframes = [prices_no1, prices_no2, prices_no3, prices_no5, prices_se3]

# checking shape of each dataframe to see if all dataframes have the same number of rows.
# row length should be 24 * 365 = 8760
for i in prices_dataframes:
    print(i.shape)

(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)


We see that all dataframes contain the correct number of rows.

Printing out first rows of each dataset to see the structure and content of the dataframes.

In [232]:
prices_no1.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO1
0,01.01.2022 00:00 - 01.01.2022 01:00,129.3,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,113.9,EUR,


In [233]:
prices_no2.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO2
0,01.01.2022 00:00 - 01.01.2022 01:00,129.3,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,113.9,EUR,


In [234]:
prices_no3.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO3
0,01.01.2022 00:00 - 01.01.2022 01:00,41.33,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,42.18,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,44.37,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,37.67,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,39.7,EUR,


In [235]:
prices_no5.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO5
0,01.01.2022 00:00 - 01.01.2022 01:00,129.3,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,113.9,EUR,


In [236]:
prices_se3.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,41.33,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,42.18,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,44.37,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,37.67,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,39.7,EUR,


#### preparation for merging of the price data from the different zones:
The common column we will merge the data based on is the date time column "MTU (UTC)". The column currency is redundant since the column that holds price already includes it in the column name. The currency column can therefore be dropped. The datasets have a colum name for which zone the data is from, but does not include any data for each entry. This information can be added to the column name for price and the column for zone can be dropped.

In [237]:
# renaming price columns
prices_no1 = prices_no1.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO1'})
prices_no2 = prices_no2.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO2'})
prices_no3 = prices_no3.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO3'})
prices_no5 = prices_no5.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO5'})
prices_se3 = prices_se3.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|SE3'})

In [238]:
# dropping redundant columns
prices_no1= prices_no1.drop(['Currency', 'BZN|NO1'], axis=1)
prices_no2= prices_no2.drop(['Currency', 'BZN|NO2'], axis=1)
prices_no3= prices_no3.drop(['Currency', 'BZN|NO3'], axis=1)
prices_no5= prices_no5.drop(['Currency', 'BZN|NO5'], axis=1)
prices_se3= prices_se3.drop(['Currency', 'BZN|SE3'], axis=1)

In [239]:
# merging the price data into one dataframe on the date time column 'MTU (UTC)'
updated_price_dataframes = [prices_no1, prices_no2, prices_no3, prices_no5, prices_se3]
prices = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), updated_price_dataframes)

# printing out the 5 first rows resulting dataframe
prices.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,129.3,129.3,41.33,129.3,41.33
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,132.08,42.18,132.08,42.18
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,111.44,44.37,111.44,44.37
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,112.35,37.67,112.35,37.67
4,01.01.2022 04:00 - 01.01.2022 05:00,113.9,113.9,39.7,113.9,39.7


In [240]:
# printing out the last 5 rows of the resulting dataframe
prices.tail()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3
8755,31.12.2022 19:00 - 31.12.2022 20:00,123.61,123.61,23.82,123.61,11.57
8756,31.12.2022 20:00 - 31.12.2022 21:00,121.09,121.09,23.93,121.09,14.89
8757,31.12.2022 21:00 - 31.12.2022 22:00,120.0,120.0,23.75,120.0,9.94
8758,31.12.2022 22:00 - 31.12.2022 23:00,119.98,119.98,23.56,119.98,4.84
8759,31.12.2022 23:00 - 01.01.2023 00:00,119.32,119.32,23.35,119.32,2.01


We can se that the datetime of the first and last rows in the dataset are correct. we also want to check if the dataframe still have the correct number of rows to se if the merge was successful

In [241]:
# printing out shape of dataframe
prices.shape

(8760, 6)

The prices dataframe has the correct number of rows (8760) so the aggregation of the price datasets have been successful.

We also want to check if the data has any missing values.

In [242]:
# checking data for missing values, if any
prices.isna().sum()

MTU (UTC)                            0
Day-ahead Price [EUR/MWh] BZN|NO1    0
Day-ahead Price [EUR/MWh] BZN|NO2    0
Day-ahead Price [EUR/MWh] BZN|NO3    0
Day-ahead Price [EUR/MWh] BZN|NO5    0
Day-ahead Price [EUR/MWh] BZN|SE3    0
dtype: int64

The data has no missing values.

### Aggregation of load data
The load data contains data about the power consumption in each zone

Importing load data from each zone

In [243]:
load_no1 = pd.read_csv("../datasets/load/NO1 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no2 = pd.read_csv("../datasets/load/NO2 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no3 = pd.read_csv("../datasets/load/NO3 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no5 = pd.read_csv("../datasets/load/NO5 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_se3 = pd.read_csv("../datasets/load/SE3 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")

# creating list of load dataframes
load_dataframes = [load_no1, load_no2, load_no3, load_no5, load_se3]

# checking shape of each dataframe to see if all dataframes have the same number of rows.
# row length should be 24 * 365 = 8760
for i in load_dataframes:
    print(i.shape)

(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)


We see that all dataframes contain the correct number of rows.

Printing out first rows of each dataset to see the structure and content of the dataframes.

In [244]:
load_no1.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO1
0,01.01.2022 00:00 - 01.01.2022 01:00,4492,4701
1,01.01.2022 01:00 - 01.01.2022 02:00,4371,4681
2,01.01.2022 02:00 - 01.01.2022 03:00,4234,4620
3,01.01.2022 03:00 - 01.01.2022 04:00,4225,4645
4,01.01.2022 04:00 - 01.01.2022 05:00,4227,4670


In [245]:
load_no2.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO2
0,01.01.2022 00:00 - 01.01.2022 01:00,4170,4440
1,01.01.2022 01:00 - 01.01.2022 02:00,3796,4480
2,01.01.2022 02:00 - 01.01.2022 03:00,3423,4424
3,01.01.2022 03:00 - 01.01.2022 04:00,3439,4386
4,01.01.2022 04:00 - 01.01.2022 05:00,3469,4375


In [246]:
load_no3.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO3
0,01.01.2022 00:00 - 01.01.2022 01:00,3235,3463
1,01.01.2022 01:00 - 01.01.2022 02:00,3158,3388
2,01.01.2022 02:00 - 01.01.2022 03:00,3047,3387
3,01.01.2022 03:00 - 01.01.2022 04:00,3052,3376
4,01.01.2022 04:00 - 01.01.2022 05:00,3038,3427


In [247]:
load_no5.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO5,Actual Total Load [MW] - BZN|NO5
0,01.01.2022 00:00 - 01.01.2022 01:00,1427,1991
1,01.01.2022 01:00 - 01.01.2022 02:00,1460,1969
2,01.01.2022 02:00 - 01.01.2022 03:00,1493,1911
3,01.01.2022 03:00 - 01.01.2022 04:00,1472,1996
4,01.01.2022 04:00 - 01.01.2022 05:00,1475,1953


In [248]:
load_se3.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|SE3,Actual Total Load [MW] - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,9483,9612
1,01.01.2022 01:00 - 01.01.2022 02:00,9435,9601
2,01.01.2022 02:00 - 01.01.2022 03:00,9402,9552
3,01.01.2022 03:00 - 01.01.2022 04:00,9453,9449
4,01.01.2022 04:00 - 01.01.2022 05:00,9617,9614


#### preparation for merging of the load data from the different zones:
The common values we will merge the data based on is the date time column "Time (UTC)", however this column is named "MTU (UTC)" in the prices' dataset. The time column will be renamed to "MTU (UTC)" so that the datasets can be merged. The column "Day-ahead Total Load Forecast" is redundant since we have the actual total load, so the forecast column will be dropped.

In [249]:
# dropping redundant columns
load_no1 = load_no1.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO1'], axis=1)
load_no2 = load_no2.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO2'], axis=1)
load_no3 = load_no3.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO3'], axis=1)
load_no5 = load_no5.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO5'], axis=1)
load_se3 = load_se3.drop(['Day-ahead Total Load Forecast [MW] - BZN|SE3'], axis=1)

In [250]:
# renaming time columns
load_no1 = load_no1.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_no2 = load_no2.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_no3 = load_no3.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_no5 = load_no5.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_se3 = load_se3.rename(columns={'Time (UTC)': 'MTU (UTC)'})

In [251]:
# merging the load data into one dataframe on the date time column 'MTU (UTC)'
updated_load_dataframes = [load_no1, load_no2, load_no3, load_no5, load_se3]
load = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), updated_load_dataframes)

# printing out the resulting dataframe
load.head()

Unnamed: 0,MTU (UTC),Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,4701,4440,3463,1991,9612
1,01.01.2022 01:00 - 01.01.2022 02:00,4681,4480,3388,1969,9601
2,01.01.2022 02:00 - 01.01.2022 03:00,4620,4424,3387,1911,9552
3,01.01.2022 03:00 - 01.01.2022 04:00,4645,4386,3376,1996,9449
4,01.01.2022 04:00 - 01.01.2022 05:00,4670,4375,3427,1953,9614


In [252]:
# printing out the last 5 rows of the resulting dataframe
load.tail()

Unnamed: 0,MTU (UTC),Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
8755,31.12.2022 19:00 - 31.12.2022 20:00,4650,4333,3612,2128,9602
8756,31.12.2022 20:00 - 31.12.2022 21:00,4573,4270,3605,2089,9251
8757,31.12.2022 21:00 - 31.12.2022 22:00,4453,4294,3479,2037,9092
8758,31.12.2022 22:00 - 31.12.2022 23:00,4389,4282,3415,1991,8944
8759,31.12.2022 23:00 - 01.01.2023 00:00,4444,4182,3346,2009,8955


We can se that the datetime of the first and last rows in the dataset are correct. we also want to check if the dataframe still have the correct number of rows to se if the merge was successful

In [253]:
# printing out shape of dataframe
load.shape

(8760, 6)

The load dataframe has the correct number of rows (8760) so the aggregation of the price datasets have been successful.

We also want to check if the dataset has any missing any values:

In [254]:
# checking data for missing values, if any
load.isna().sum()

MTU (UTC)                           0
Actual Total Load [MW] - BZN|NO1    0
Actual Total Load [MW] - BZN|NO2    0
Actual Total Load [MW] - BZN|NO3    0
Actual Total Load [MW] - BZN|NO5    0
Actual Total Load [MW] - BZN|SE3    0
dtype: int64

There are no missing values

### Aggregation of energy generation data

In [255]:
actual_generation_no1 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000NO1.csv")
actual_generation_no2 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000 NO2.csv")
actual_generation_no3 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000NO3.csv")
actual_generation_no5 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000NO5.csv")
actual_generation_se3 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000SE3.csv")

In [256]:
actual_generation_no1.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO1,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,878,321,n/e,n/e,n/e,n/e,n/e,8,n/e,172
1,BZN|NO1,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,897,330,n/e,n/e,n/e,n/e,n/e,8,n/e,151
2,BZN|NO1,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,885,275,n/e,n/e,n/e,n/e,n/e,8,n/e,160
3,BZN|NO1,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,890,269,n/e,n/e,n/e,n/e,n/e,8,n/e,148
4,BZN|NO1,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,892,300,n/e,n/e,n/e,n/e,n/e,8,n/e,147


In [257]:
actual_generation_no2.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO2,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,629,1874,n/e,n/e,n/e,n/e,n/e,5,n/e,244
1,BZN|NO2,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,627,2012,n/e,n/e,n/e,n/e,n/e,4,n/e,115
2,BZN|NO2,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,624,1643,n/e,n/e,n/e,n/e,n/e,4,n/e,55
3,BZN|NO2,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,624,1631,n/e,n/e,n/e,n/e,n/e,4,n/e,61
4,BZN|NO2,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,605,1642,n/e,n/e,n/e,n/e,n/e,4,n/e,88


In [258]:
actual_generation_no3.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO3,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,389,2166,n/e,n/e,0,12,n/e,3,n/e,219
1,BZN|NO3,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,390,2187,n/e,n/e,0,12,n/e,2,n/e,170
2,BZN|NO3,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,405,2183,n/e,n/e,0,12,n/e,1,n/e,126
3,BZN|NO3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,393,2076,n/e,n/e,0,12,n/e,0,n/e,81
4,BZN|NO3,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,398,2098,n/e,n/e,0,11,n/e,0,n/e,45


In [259]:
actual_generation_no5.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO5,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,227,1759,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e
1,BZN|NO5,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,216,1890,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
2,BZN|NO5,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,215,1460,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
3,BZN|NO5,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,209,1394,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
4,BZN|NO5,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,210,1488,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e


In [260]:
actual_generation_se3.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|SE3,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5842.0,851.0,n/e,0.0,n/e,n/e,999.0
1,BZN|SE3,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5839.0,858.0,n/e,0.0,n/e,n/e,958.0
2,BZN|SE3,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,838.0,0,5839.0,859.0,n/e,0.0,n/e,n/e,888.0
3,BZN|SE3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,842.0,0,5840.0,856.0,n/e,0.0,n/e,n/e,788.0
4,BZN|SE3,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,841.0,0,5839.0,880.0,n/e,0.0,n/e,n/e,734.0


In [261]:
# Renaming every column to have zones instead of using a column for 'Area'
actual_generation_no1.columns = ['Area','MTU (UTC)','Biomass - BZN|NO1', 'Fossil Brown coal/Lignite - BZN|NO1', 'Fossil Coal-derived gas - BZN|NO1', 'Fossil Gas - BZN|NO1', 'Fossil Hard coal - BZN|NO1', 'Fossil Oil - BZN|NO1', 'Fossil Oil shale - BZN|NO1', 'Fossil Peat - BZN|NO1', 'Geothermal - BZN|NO1', 'Hydro Pumped Storage Aggregated- BZN|NO1', 'Hydro Pumped Storage Consumption - BZN|NO1', 'Hydro Run-of-river and poundage - BZN|NO1', 'Hydro Water Reservoir - BZN|NO1', 'Marine - BZN|NO1', 'Nuclear - BZN|NO1', 'Other - BZN|NO1', 'Other renewable - BZN|NO1', 'Solar - BZN|NO1', 'Waste - BZN|NO1', 'Wind Offshore - BZN|NO1', 'Wind Onshore - BZN|NO1']
actual_generation_no2.columns = ['Area','MTU (UTC)','Biomass - BZN|NO2', 'Fossil Brown coal/Lignite - BZN|NO2', 'Fossil Coal-derived gas - BZN|NO2', 'Fossil Gas - BZN|NO2', 'Fossil Hard coal - BZN|NO2', 'Fossil Oil - BZN|NO2', 'Fossil Oil shale - BZN|NO2', 'Fossil Peat - BZN|NO2', 'Geothermal - BZN|NO2', 'Hydro Pumped Storage Aggregated- BZN|NO2', 'Hydro Pumped Storage Consumption - BZN|NO2', 'Hydro Run-of-river and poundage - BZN|NO1', 'Hydro Water Reservoir - BZN|NO2', 'Marine - BZN|NO2', 'Nuclear - BZN|NO2', 'Other - BZN|NO2', 'Other renewable - BZN|NO2', 'Solar - BZN|NO2', 'Waste - BZN|NO2', 'Wind Offshore - BZN|NO2', 'Wind Onshore - BZN|NO2']
actual_generation_no3.columns = ['Area','MTU (UTC)','Biomass - BZN|NO3', 'Fossil Brown coal/Lignite - BZN|NO3', 'Fossil Coal-derived gas - BZN|NO3', 'Fossil Gas - BZN|NO3', 'Fossil Hard coal - BZN|NO3', 'Fossil Oil - BZN|NO3', 'Fossil Oil shale - BZN|NO3', 'Fossil Peat - BZN|NO3', 'Geothermal - BZN|NO3', 'Hydro Pumped Storage Aggregated- BZN|NO3', 'Hydro Pumped Storage Consumption - BZN|NO3', 'Hydro Run-of-river and poundage - BZN|NO3', 'Hydro Water Reservoir - BZN|NO3', 'Marine - BZN|NO3', 'Nuclear - BZN|NO3', 'Other - BZN|NO3', 'Other renewable - BZN|NO3', 'Solar - BZN|NO3', 'Waste - BZN|NO3', 'Wind Offshore - BZN|NO3', 'Wind Onshore - BZN|NO3']
actual_generation_no5.columns = ['Area','MTU (UTC)','Biomass - BZN|NO5', 'Fossil Brown coal/Lignite - BZN|NO5', 'Fossil Coal-derived gas - BZN|NO5', 'Fossil Gas - BZN|NO5', 'Fossil Hard coal - BZN|NO5', 'Fossil Oil - BZN|NO5', 'Fossil Oil shale - BZN|NO5', 'Fossil Peat - BZN|NO5', 'Geothermal - BZN|NO5', 'Hydro Pumped Storage Aggregated- BZN|NO5', 'Hydro Pumped Storage Consumption - BZN|NO5', 'Hydro Run-of-river and poundage - BZN|NO5', 'Hydro Water Reservoir - BZN|NO5', 'Marine - BZN|NO5', 'Nuclear - BZN|NO5', 'Other - BZN|NO5', 'Other renewable - BZN|NO5', 'Solar - BZN|NO5', 'Waste - BZN|NO5', 'Wind Offshore - BZN|NO5', 'Wind Onshore - BZN|NO5']
actual_generation_se3.columns = ['Area','MTU (UTC)','Biomass - BZN|SE3', 'Fossil Brown coal/Lignite - BZN|SE3', 'Fossil Coal-derived gas - BZN|SE3', 'Fossil Gas - BZN|SE3', 'Fossil Hard coal - BZN|SE3', 'Fossil Oil - BZN|SE3', 'Fossil Oil shale - BZN|SE3', 'Fossil Peat - BZN|SE3', 'Geothermal - BZN|SE3', 'Hydro Pumped Storage Aggregated- BZN|SE3', 'Hydro Pumped Storage Consumption - BZN|SE3', 'Hydro Run-of-river and poundage - BZN|SE3', 'Hydro Water Reservoir - BZN|SE3', 'Marine - BZN|SE3', 'Nuclear - BZN|SE3', 'Other - BZN|SE3', 'Other renewable - BZN|SE3', 'Solar - BZN|SE3', 'Waste - BZN|SE3', 'Wind Offshore - BZN|SE3', 'Wind Onshore - BZN|SE3']
actual_generation_no1.head()

Unnamed: 0,Area,MTU (UTC),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Marine - BZN|NO1,Nuclear - BZN|NO1,Other - BZN|NO1,Other renewable - BZN|NO1,Solar - BZN|NO1,Waste - BZN|NO1,Wind Offshore - BZN|NO1,Wind Onshore - BZN|NO1
0,BZN|NO1,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,878,321,n/e,n/e,n/e,n/e,n/e,8,n/e,172
1,BZN|NO1,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,897,330,n/e,n/e,n/e,n/e,n/e,8,n/e,151
2,BZN|NO1,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,885,275,n/e,n/e,n/e,n/e,n/e,8,n/e,160
3,BZN|NO1,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,890,269,n/e,n/e,n/e,n/e,n/e,8,n/e,148
4,BZN|NO1,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,892,300,n/e,n/e,n/e,n/e,n/e,8,n/e,147


In [262]:
# Dropping area because we keep the zone name in the column name
actual_generation_no1 = actual_generation_no1.drop(['Area'], axis=1)
actual_generation_no2 = actual_generation_no2.drop(['Area'], axis=1)
actual_generation_no3 = actual_generation_no3.drop(['Area'], axis=1)
actual_generation_no5 = actual_generation_no5.drop(['Area'], axis=1)
actual_generation_se3 = actual_generation_se3.drop(['Area'], axis=1)
actual_generation_no1.head()

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,Geothermal - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Marine - BZN|NO1,Nuclear - BZN|NO1,Other - BZN|NO1,Other renewable - BZN|NO1,Solar - BZN|NO1,Waste - BZN|NO1,Wind Offshore - BZN|NO1,Wind Onshore - BZN|NO1
0,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,878,321,n/e,n/e,n/e,n/e,n/e,8,n/e,172
1,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,897,330,n/e,n/e,n/e,n/e,n/e,8,n/e,151
2,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,885,275,n/e,n/e,n/e,n/e,n/e,8,n/e,160
3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,890,269,n/e,n/e,n/e,n/e,n/e,8,n/e,148
4,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,892,300,n/e,n/e,n/e,n/e,n/e,8,n/e,147


In [263]:
# Creating one dataframe from the 5 dataframes for each zone
actual_generation_dataframes = [actual_generation_no1, actual_generation_no2, actual_generation_no3, actual_generation_no5, actual_generation_se3]
actual_generation = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), actual_generation_dataframes)

# printing out the resulting dataframe
actual_generation.head()

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,Geothermal - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|SE3,Hydro Water Reservoir - BZN|SE3,Marine - BZN|SE3,Nuclear - BZN|SE3,Other - BZN|SE3,Other renewable - BZN|SE3,Solar - BZN|SE3,Waste - BZN|SE3,Wind Offshore - BZN|SE3,Wind Onshore - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5842.0,851.0,n/e,0.0,n/e,n/e,999.0
1,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5839.0,858.0,n/e,0.0,n/e,n/e,958.0
2,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,n/e,838.0,0,5839.0,859.0,n/e,0.0,n/e,n/e,888.0
3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,n/e,842.0,0,5840.0,856.0,n/e,0.0,n/e,n/e,788.0
4,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,n/e,841.0,0,5839.0,880.0,n/e,0.0,n/e,n/e,734.0


We have alot of columns which are used in other nations and zones that are redundant to us because there is no power generation from these sources in the zones we are looking at. Therefore, we select only the columns which keep information about power generation in each zone.

In [264]:
# Selecting only relevant columns
actual_generation_selected = actual_generation[['MTU (UTC)', 'Biomass - BZN|NO1', 'Fossil Gas - BZN|NO1', 'Hydro Run-of-river and poundage - BZN|NO1_x', 'Hydro Water Reservoir - BZN|NO1', 'Waste - BZN|NO1', 'Wind Onshore - BZN|NO1', 'Fossil Gas - BZN|NO2', 'Hydro Pumped Storage Aggregated- BZN|NO2', 'Hydro Run-of-river and poundage - BZN|NO1_y', 'Hydro Water Reservoir - BZN|NO2', 'Waste - BZN|NO2', 'Wind Onshore - BZN|NO2', 'Hydro Pumped Storage Aggregated- BZN|NO3', 'Hydro Run-of-river and poundage - BZN|NO3', 'Hydro Water Reservoir - BZN|NO3', 'Other - BZN|NO3', 'Other renewable - BZN|NO3', 'Waste - BZN|NO3', 'Wind Onshore - BZN|NO3', 'Fossil Gas - BZN|NO5', 'Hydro Pumped Storage Aggregated- BZN|NO5', 'Hydro Run-of-river and poundage - BZN|NO5', 'Hydro Water Reservoir - BZN|NO5', 'Waste - BZN|NO5', 'Fossil Gas - BZN|SE3', 'Hydro Water Reservoir - BZN|SE3', 'Nuclear - BZN|SE3', 'Other - BZN|SE3', 'Solar - BZN|SE3', 'Wind Onshore - BZN|SE3']].copy()
# printing the head of the resulting dataframe
actual_generation_selected.dtypes

MTU (UTC)                                       object
Biomass - BZN|NO1                                int64
Fossil Gas - BZN|NO1                             int64
Hydro Run-of-river and poundage - BZN|NO1_x      int64
Hydro Water Reservoir - BZN|NO1                  int64
Waste - BZN|NO1                                  int64
Wind Onshore - BZN|NO1                           int64
Fossil Gas - BZN|NO2                             int64
Hydro Pumped Storage Aggregated- BZN|NO2         int64
Hydro Run-of-river and poundage - BZN|NO1_y      int64
Hydro Water Reservoir - BZN|NO2                  int64
Waste - BZN|NO2                                  int64
Wind Onshore - BZN|NO2                           int64
Hydro Pumped Storage Aggregated- BZN|NO3         int64
Hydro Run-of-river and poundage - BZN|NO3        int64
Hydro Water Reservoir - BZN|NO3                  int64
Other - BZN|NO3                                  int64
Other renewable - BZN|NO3                        int64
Waste - BZ

We can see that the formatting of the datetime interval includes (CET/CEST) in the actual values. This extra information will need to be removed so they mach the other datasets, and can be merged on the time column

In [265]:
# removing (CET/CEST) and trailing whitespace from datetime values in column 'MTU (CET/CEST)'
actual_generation_selected['MTU (UTC)'] = actual_generation_selected['MTU (UTC)'].map(lambda x: x.rstrip(')(UTC').strip())

# printing out resulting dataset
actual_generation_selected.head()

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Gas - BZN|NO1,Hydro Run-of-river and poundage - BZN|NO1_x,Hydro Water Reservoir - BZN|NO1,Waste - BZN|NO1,Wind Onshore - BZN|NO1,Fossil Gas - BZN|NO2,Hydro Pumped Storage Aggregated- BZN|NO2,Hydro Run-of-river and poundage - BZN|NO1_y,...,Hydro Pumped Storage Aggregated- BZN|NO5,Hydro Run-of-river and poundage - BZN|NO5,Hydro Water Reservoir - BZN|NO5,Waste - BZN|NO5,Fossil Gas - BZN|SE3,Hydro Water Reservoir - BZN|SE3,Nuclear - BZN|SE3,Other - BZN|SE3,Solar - BZN|SE3,Wind Onshore - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,0,0,878,321,8,172,0,1,629,...,66,227,1759,19,0.0,818.0,5842.0,851.0,0.0,999.0
1,01.01.2022 01:00 - 01.01.2022 02:00,0,0,897,330,8,151,0,1,627,...,66,216,1890,20,0.0,818.0,5839.0,858.0,0.0,958.0
2,01.01.2022 02:00 - 01.01.2022 03:00,0,0,885,275,8,160,0,1,624,...,65,215,1460,20,0.0,838.0,5839.0,859.0,0.0,888.0
3,01.01.2022 03:00 - 01.01.2022 04:00,0,0,890,269,8,148,0,1,624,...,65,209,1394,20,0.0,842.0,5840.0,856.0,0.0,788.0
4,01.01.2022 04:00 - 01.01.2022 05:00,0,0,892,300,8,147,0,1,605,...,65,210,1488,19,0.0,841.0,5839.0,880.0,0.0,734.0


In [266]:
actual_generation_selected.isna().sum()

MTU (UTC)                                      0
Biomass - BZN|NO1                              0
Fossil Gas - BZN|NO1                           0
Hydro Run-of-river and poundage - BZN|NO1_x    0
Hydro Water Reservoir - BZN|NO1                0
Waste - BZN|NO1                                0
Wind Onshore - BZN|NO1                         0
Fossil Gas - BZN|NO2                           0
Hydro Pumped Storage Aggregated- BZN|NO2       0
Hydro Run-of-river and poundage - BZN|NO1_y    0
Hydro Water Reservoir - BZN|NO2                0
Waste - BZN|NO2                                0
Wind Onshore - BZN|NO2                         0
Hydro Pumped Storage Aggregated- BZN|NO3       0
Hydro Run-of-river and poundage - BZN|NO3      0
Hydro Water Reservoir - BZN|NO3                0
Other - BZN|NO3                                0
Other renewable - BZN|NO3                      0
Waste - BZN|NO3                                0
Wind Onshore - BZN|NO3                         0
Fossil Gas - BZN|NO5

In [267]:
actual_generation_selected.shape

(8760, 31)

### Data Aggregation for import and export


We want to concatenate data for cross border physical flow between NO1 and neighbouring zones(NO2,NO3,NO5,SE3).

In [268]:
# Importing import export datasets
import_export_no1_no2 = pd.read_csv("../datasets/import_and_export/NO1-NO2 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_no3 = pd.read_csv("../datasets/import_and_export/NO1-NO3 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_no5 = pd.read_csv("../datasets/import_and_export/NO1-NO5 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_se3 = pd.read_csv("../datasets/import_and_export/NO1-SE3 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")

In other data files the time unite is MTU (CET/CEST), this is not teh case for import and export data. To be consistent Time (CET/CEST) is changed to MTU (CET/CEST). To make it clear that 'BZN|NO2 > BZN|NO1 [MW]' shows the cross border physical flow , CBF is used as an abbreviation.

Printing out first rows of each dataset to see the structure

In [269]:
import_export_no1_no2.head()

Unnamed: 0,Time (UTC),BZN|NO2 > BZN|NO1 [MW],BZN|NO1 > BZN|NO2 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1242,0
1,01.01.2022 01:00 - 01.01.2022 02:00,1222,0
2,01.01.2022 02:00 - 01.01.2022 03:00,1679,0
3,01.01.2022 03:00 - 01.01.2022 04:00,1668,0
4,01.01.2022 04:00 - 01.01.2022 05:00,1697,0


In [270]:
import_export_no1_no3.head()

Unnamed: 0,Time (UTC),BZN|NO3 > BZN|NO1 [MW],BZN|NO1 > BZN|NO3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,371,0
1,01.01.2022 01:00 - 01.01.2022 02:00,383,0
2,01.01.2022 02:00 - 01.01.2022 03:00,362,0
3,01.01.2022 03:00 - 01.01.2022 04:00,324,0
4,01.01.2022 04:00 - 01.01.2022 05:00,339,0


In [271]:
import_export_no1_no5.head()

Unnamed: 0,Time (UTC),BZN|NO5 > BZN|NO1 [MW],BZN|NO1 > BZN|NO5 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,594,0
1,01.01.2022 01:00 - 01.01.2022 02:00,676,0
2,01.01.2022 02:00 - 01.01.2022 03:00,436,0
3,01.01.2022 03:00 - 01.01.2022 04:00,345,0
4,01.01.2022 04:00 - 01.01.2022 05:00,412,0


In [272]:
import_export_no1_se3.head()

Unnamed: 0,Time (UTC),BZN|SE3 > BZN|NO1 [MW],BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1065,0
1,01.01.2022 01:00 - 01.01.2022 02:00,927,0
2,01.01.2022 02:00 - 01.01.2022 03:00,748,0
3,01.01.2022 03:00 - 01.01.2022 04:00,918,0
4,01.01.2022 04:00 - 01.01.2022 05:00,826,0


We see that the time column in the import export datasets is named "Time (UTC)" while in the other datasets, the time column is named "MTU (UTC)". The time column will be renamed to "MTU (UTC)" so that the datasets can be merged.

In [273]:
import_export_no1_no2.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|NO2 > BZN|NO1 [MW]' : 'CBF BZN|NO2 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO2 [MW]' : 'CBF BZN|NO1 > BZN|NO2 [MW]'}, inplace = True)
import_export_no1_no3.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|NO3 > BZN|NO1 [MW]' : 'CBF BZN|NO3 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO3 [MW]' : 'CBF BZN|NO1 > BZN|NO3 [MW]'}, inplace = True)
import_export_no1_no5.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|NO5 > BZN|NO1 [MW]' : 'CBF BZN|NO5 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO5 [MW]' : 'CBF BZN|NO1 > BZN|NO5 [MW]'}, inplace = True)
import_export_no1_se3.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|SE3 > BZN|NO1 [MW]' : 'CBF BZN|SE3 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|SE3 [MW]' : 'CBF BZN|NO1 > BZN|SE3 [MW]'}, inplace = True)

Merging all dataframes for import and export


In [274]:
# Merging the import export datasets
import_export_no1_neighbours_dataframes = [import_export_no1_no2, import_export_no1_no3, import_export_no1_no5, import_export_no1_se3]
import_export_no1_neighbours = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), import_export_no1_neighbours_dataframes)

# printing out the 5 first rows of the resulting dataframe
import_export_no1_neighbours.shape

(8760, 9)

### Aggregation of Water Reservoirs and Hydro Storage Plants

The water reservoir dataset is measured with a weekly frequency while the other datasets are measured hourly. In order to make the data match frequency of measurements of the other datasets, we will need to up sample the data thorough interpolation.  We will first map the weekly measurement to the first hour of each week, and then interpolate the intermediate measurement. In order to interpolate the measurements of the last week, we will add the measurement for the first week of the succeeding year (2023) to the dataset. this measurement  will be dropped from the final dataset. Additionally, the first week of 2022 starts on january 3. 2022, so in order to get the data for the first two days we add the dataset for the last week of 2021. We will use spline interpolation to add smoothness to the interpolated curve, which will be more representative of water level fluctuations than a linear interpolation.

Reading in water levels as CSV with pandas library

In [275]:
# Loading in main datasets (2022)
water_level_NO1 = pd.read_csv("../datasets/water_level/NO1_2022_Water_Reservoirs_and_Hydro_Storage_Plants_202201030000-202301020000.csv")
water_level_NO2 = pd.read_csv("../datasets/water_level/NO2_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_NO3 = pd.read_csv("../datasets/water_level/NO3_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_NO5 = pd.read_csv("../datasets/water_level/NO5_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_SE3 = pd.read_csv("../datasets/water_level/SE3_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")

# Reading in only the last week of 2021
water_level_NO1_2021 = pd.read_csv("../datasets/water_level/NO1_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv", skiprows=range(1, 52))
water_level_NO2_2021 = pd.read_csv("../datasets/water_level/NO2_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv", skiprows=range(1, 52))
water_level_NO3_2021 = pd.read_csv("../datasets/water_level/NO3_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv", skiprows=range(1, 52))
water_level_NO5_2021 = pd.read_csv("../datasets/water_level/NO5_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv", skiprows=range(1, 52))
water_level_SE3_2021 = pd.read_csv("../datasets/water_level/SE3_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv", skiprows=range(1, 52))

# Reading in only the first week of 2023
water_level_NO1_2023 = pd.read_csv("../datasets/water_level/NO1_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO2_2023 = pd.read_csv("../datasets/water_level/NO2_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO3_2023 = pd.read_csv("../datasets/water_level/NO3_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO5_2023 = pd.read_csv("../datasets/water_level/NO5_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_SE3_2023 = pd.read_csv("../datasets/water_level/SE3_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)

# Renaming the Week datapoint for all 2023 datasets because they are originally called "week 1" which would create confusion in the data
water_level_NO1_2023.at[0, 'Week'] = 'Week 53'
water_level_NO2_2023.at[0, 'Week'] = 'Week 53'
water_level_NO3_2023.at[0, 'Week'] = 'Week 53'
water_level_NO5_2023.at[0, 'Week'] = 'Week 53'
water_level_SE3_2023.at[0, 'Week'] = 'Week 53'

# Getting 'Week 52' from 2021 datasets
water_level_NO1_2021.at[0, 'Week'] = 'Week 0'
water_level_NO2_2021.at[0, 'Week'] = 'Week 0'
water_level_NO3_2021.at[0, 'Week'] = 'Week 0'
water_level_NO5_2021.at[0, 'Week'] = 'Week 0'
water_level_SE3_2021.at[0, 'Week'] = 'Week 0'


Checking to see if we have correctly selected the last week of 2021 and first week of 2023

In [276]:
# Checking if the dataframe contains the first week of  2023
water_level_NO1_2023.head()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1
0,Week 53,3741135


In [277]:
# Checking if the dataframe containes the last week of  2021
water_level_NO1_2021.head()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1
0,Week 0,3204274


Concatinating the datasets and ignoring the index to create a new index. The concatination happens in the order of how they are written in pd.concat([df1,df2,df3])

In [278]:
# Concatinating the dataset to add the first week of 2023 to the 2022 datasets. ignore index ignores the index number of the row in 2023 and gives it a new after being "appended"
water_level_NO1 = pd.concat([water_level_NO1_2021, water_level_NO1, water_level_NO1_2023], ignore_index= True, axis=0)
water_level_NO2 = pd.concat([water_level_NO2_2021, water_level_NO2, water_level_NO2_2023], ignore_index= True, axis=0)
water_level_NO3 = pd.concat([water_level_NO3_2021, water_level_NO3, water_level_NO3_2023], ignore_index= True, axis=0)
water_level_NO5 = pd.concat([water_level_NO5_2021 ,water_level_NO5, water_level_NO5_2023], ignore_index= True, axis=0)
water_level_SE3 = pd.concat([water_level_SE3_2021, water_level_SE3, water_level_SE3_2023], ignore_index= True, axis=0)

Testing to see if the concatenation is successful by looking at the first and last rows

In [279]:
# printing out first 5 rows of the resulting dataset to see if the last week
# of 2021 was added
water_level_NO1.head()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1
0,Week 0,3204274
1,Week 1,2955197
2,Week 2,2754532
3,Week 3,2556285
4,Week 4,2388301


In [280]:
# printing out last 5 rows of the resulting dataset to see if the first week
# of 2023 was added
water_level_NO1.tail()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1
49,Week 49,4547299
50,Week 50,4240847
51,Week 51,4054917
52,Week 52,3915448
53,Week 53,3741135


We can see that the extra weeks have been added correctly and can merge the datasets from the different zones into a larger dataset

In [281]:
# Merging the dataframes
water_level_dataframes = [water_level_NO1, water_level_NO2, water_level_NO3, water_level_NO5, water_level_SE3]
water_level_dataframes = reduce(lambda left, right: pd.merge(left,right, on=['Week']), water_level_dataframes)

# printing out the 5 first rows of the resulting dataframe
water_level_dataframes.head()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,Week 0,3204274,16810140,5688093,8631916,1656000
1,Week 1,2955197,16214535,5314677,8130917,1566000
2,Week 2,2754532,15749289,5423690,7834726,1475000
3,Week 3,2556285,15047132,5363892,7445563,1415000
4,Week 4,2388301,14733590,5186818,7172051,1361000


We will now convert the week numbers into the datetime corresponding with the first day of each week, starting on 27th of december 2021 and ending on the 2nd of January 2023. This is done with pd.date_range which returns a fixed frequency DatetimeIndex

In [282]:
start_date = '2021-12-27'
end_date = '2023-01-02'
week_date_timeframe = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='7D'))
week_date_timeframe.rename(columns={0: 'Week_start_date'}, inplace=True)
week_date_timeframe.head()

Unnamed: 0,Week_start_date
0,2021-12-27
1,2022-01-03
2,2022-01-10
3,2022-01-17
4,2022-01-24


Now that we have a dataset containing corresponding datetimes to the weekly water levels, we can now join the two dataframes, and we can drop the 'Week' column as it is no longer of any use.

In [283]:
# joining the dataframes week_start_date and water_level_dataframes
joined_water_week = pd.concat([week_date_timeframe, water_level_dataframes], axis=1)
joined_water_week.head()

Unnamed: 0,Week_start_date,Week,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2021-12-27,Week 0,3204274,16810140,5688093,8631916,1656000
1,2022-01-03,Week 1,2955197,16214535,5314677,8130917,1566000
2,2022-01-10,Week 2,2754532,15749289,5423690,7834726,1475000
3,2022-01-17,Week 3,2556285,15047132,5363892,7445563,1415000
4,2022-01-24,Week 4,2388301,14733590,5186818,7172051,1361000


In [284]:
# dropping the week column
joined_water_week.drop('Week', axis=1, inplace=True)

# printing out 5 first rows of the resulting dataframe
joined_water_week.head()

Unnamed: 0,Week_start_date,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2021-12-27,3204274,16810140,5688093,8631916,1656000
1,2022-01-03,2955197,16214535,5314677,8130917,1566000
2,2022-01-10,2754532,15749289,5423690,7834726,1475000
3,2022-01-17,2556285,15047132,5363892,7445563,1415000
4,2022-01-24,2388301,14733590,5186818,7172051,1361000


Setting 'Week_start_date' as the index so that the whole dataframe can be treated as a datetime object

In [285]:
# Setting 'Week_start_date' as the index
joined_water_week.set_index('Week_start_date', inplace=True)

In [286]:
# printing out the resulting index
joined_water_week.index

DatetimeIndex(['2021-12-27', '2022-01-03', '2022-01-10', '2022-01-17',
               '2022-01-24', '2022-01-31', '2022-02-07', '2022-02-14',
               '2022-02-21', '2022-02-28', '2022-03-07', '2022-03-14',
               '2022-03-21', '2022-03-28', '2022-04-04', '2022-04-11',
               '2022-04-18', '2022-04-25', '2022-05-02', '2022-05-09',
               '2022-05-16', '2022-05-23', '2022-05-30', '2022-06-06',
               '2022-06-13', '2022-06-20', '2022-06-27', '2022-07-04',
               '2022-07-11', '2022-07-18', '2022-07-25', '2022-08-01',
               '2022-08-08', '2022-08-15', '2022-08-22', '2022-08-29',
               '2022-09-05', '2022-09-12', '2022-09-19', '2022-09-26',
               '2022-10-03', '2022-10-10', '2022-10-17', '2022-10-24',
               '2022-10-31', '2022-11-07', '2022-11-14', '2022-11-21',
               '2022-11-28', '2022-12-05', '2022-12-12', '2022-12-19',
               '2022-12-26', '2023-01-02'],
              dtype='datetime64[n

In [287]:
# printing out the 5 first rows of the resulting dataframe
joined_water_week.head()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-27,3204274,16810140,5688093,8631916,1656000
2022-01-03,2955197,16214535,5314677,8130917,1566000
2022-01-10,2754532,15749289,5423690,7834726,1475000
2022-01-17,2556285,15047132,5363892,7445563,1415000
2022-01-24,2388301,14733590,5186818,7172051,1361000


Upsampling the data by changing the frequency from a 7-day frequency to an hour based one.

In [288]:
# uppsampling joined_water_week dataframe to an hourly frequency
joined_water_week = joined_water_week.asfreq('H')

In [289]:
# printing out 5 first rows of the resulting dataframe
joined_water_week.head()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-27 00:00:00,3204274.0,16810140.0,5688093.0,8631916.0,1656000.0
2021-12-27 01:00:00,,,,,
2021-12-27 02:00:00,,,,,
2021-12-27 03:00:00,,,,,
2021-12-27 04:00:00,,,,,


In [290]:
# printing out 5 last rows of the resulting dataframe
joined_water_week.tail()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-01 20:00:00,,,,,
2023-01-01 21:00:00,,,,,
2023-01-01 22:00:00,,,,,
2023-01-01 23:00:00,,,,,
2023-01-02 00:00:00,3741135.0,21066642.0,4657760.0,11620688.0,1584000.0


We can see that the data has been upsampled, and we can interpolate the data. We will interpolate with spline interpolation to generate polynomial  datapoints while requiring a lower polynomial degree.The classical approach is to use polynomials of exactly degree 3 - cubic splines. source: Erwin Kreyszig (2005). Advanced Engineering Mathematics (9 ed.). Wiley. p. 816. ISBN 9780471488859.

In [291]:
# performing cubic spline interpolation (3rd degree polynomial)
interpolated_df = joined_water_week.interpolate(method='spline', order=3)

In [292]:
# printing out the 5 first rows of the interpolated dataframe
interpolated_df.head()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-27 00:00:00,3204274.0,16810140.0,5688093.0,8631916.0,1656000.0
2021-12-27 01:00:00,3202495.0,16804770.0,5682780.0,8627389.0,1655553.0
2021-12-27 02:00:00,3200721.0,16799430.0,5677514.0,8622887.0,1655104.0
2021-12-27 03:00:00,3198951.0,16794120.0,5672294.0,8618408.0,1654654.0
2021-12-27 04:00:00,3197185.0,16788850.0,5667121.0,8613954.0,1654202.0


In [293]:
# printing out the 5 last rows of the interpolated dataframe
interpolated_df.tail()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-01 20:00:00,3746225.0,21088070.0,4672723.0,11635910.0,1585083.0
2023-01-01 21:00:00,3744958.0,21082800.0,4669017.0,11632140.0,1584819.0
2023-01-01 22:00:00,3743687.0,21077470.0,4665288.0,11628350.0,1584551.0
2023-01-01 23:00:00,3742413.0,21072080.0,4661536.0,11624530.0,1584278.0
2023-01-02 00:00:00,3741135.0,21066640.0,4657760.0,11620690.0,1584000.0


In [294]:
# checking data for missing values, if any
interpolated_df.isna().sum()

Stored Energy Value [MWh] - BZN|NO1    0
Stored Energy Value [MWh] - BZN|NO2    0
Stored Energy Value [MWh] - BZN|NO3    0
Stored Energy Value [MWh] - BZN|NO5    0
Stored Energy Value [MWh] - BZN|SE3    0
dtype: int64

The data above includes the timestamps for last week of 2021 and first week of 2023. To make the water level dataframe mach the other dataframes we will filter out the data from the year 2021 and 2023

In [295]:
# filtering out the data from year 2021 and 2023
filtering_interpolation_df = interpolated_df.loc[(interpolated_df.index >= '2022-01-01 00:00:00')
                   & (interpolated_df.index <= '2022-12-31 23:00:00')]

The other datasets have a numerical index, and the time as a feature, we therefore reset the index to mach the other datasets.

In [296]:
# resetting index
water_reservoir = filtering_interpolation_df
water_reservoir = water_reservoir.reset_index()
water_reservoir = water_reservoir.rename(columns={'Week_start_date': 'MTU (UTC)'})
water_reservoir.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 6 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   MTU (UTC)                            8760 non-null   object 
 1   Stored Energy Value [MWh] - BZN|NO1  8760 non-null   float64
 2   Stored Energy Value [MWh] - BZN|NO2  8760 non-null   float64
 3   Stored Energy Value [MWh] - BZN|NO3  8760 non-null   float64
 4   Stored Energy Value [MWh] - BZN|NO5  8760 non-null   float64
 5   Stored Energy Value [MWh] - BZN|SE3  8760 non-null   float64
dtypes: float64(5), object(1)
memory usage: 410.8+ KB


We now have the finished water_reservoir dataframe which we can merge with the other datasets

In [297]:
# printing out 5 first rows of the finished water_reservoir dataframe
water_reservoir.head()

Unnamed: 0,MTU (UTC),Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2022-01-01 00:00:00,3018018.0,16339420.0,5335153.0,8232289.0,1593721.0
1,2022-01-01 01:00:00,3016657.0,16336600.0,5334192.0,8229931.0,1593152.0
2,2022-01-01 02:00:00,3015298.0,16333780.0,5333256.0,8227586.0,1592581.0
3,2022-01-01 03:00:00,3013942.0,16330980.0,5332346.0,8225252.0,1592011.0
4,2022-01-01 04:00:00,3012588.0,16328190.0,5331461.0,8222931.0,1591439.0


### Aggregating all sub-datasets
Now that we have aggregated each of the sub-datasets, we can merge them together into a larger dataset that can be used to train time series electricity price prediction models on. we will wait with adding the water_reservoir until the time-range column 'MTU (UTC)' of the other datasets have been converted to datetime columns.

In [298]:
pd.merge(prices, load, on=['MTU (UTC)'])

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,129.30,129.30,41.33,129.30,41.33,4701,4440,3463,1991,9612
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,132.08,42.18,132.08,42.18,4681,4480,3388,1969,9601
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,111.44,44.37,111.44,44.37,4620,4424,3387,1911,9552
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,112.35,37.67,112.35,37.67,4645,4386,3376,1996,9449
4,01.01.2022 04:00 - 01.01.2022 05:00,113.90,113.90,39.70,113.90,39.70,4670,4375,3427,1953,9614
...,...,...,...,...,...,...,...,...,...,...,...
8755,31.12.2022 19:00 - 31.12.2022 20:00,123.61,123.61,23.82,123.61,11.57,4650,4333,3612,2128,9602
8756,31.12.2022 20:00 - 31.12.2022 21:00,121.09,121.09,23.93,121.09,14.89,4573,4270,3605,2089,9251
8757,31.12.2022 21:00 - 31.12.2022 22:00,120.00,120.00,23.75,120.00,9.94,4453,4294,3479,2037,9092
8758,31.12.2022 22:00 - 31.12.2022 23:00,119.98,119.98,23.56,119.98,4.84,4389,4282,3415,1991,8944


In [299]:
pd.merge(actual_generation_selected, import_export_no1_neighbours, on=['MTU (UTC)'])

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Gas - BZN|NO1,Hydro Run-of-river and poundage - BZN|NO1_x,Hydro Water Reservoir - BZN|NO1,Waste - BZN|NO1,Wind Onshore - BZN|NO1,Fossil Gas - BZN|NO2,Hydro Pumped Storage Aggregated- BZN|NO2,Hydro Run-of-river and poundage - BZN|NO1_y,...,Solar - BZN|SE3,Wind Onshore - BZN|SE3,CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,0,0,878,321,8,172,0,1,629,...,0.0,999.0,1242,0,371,0,594,0,1065,0
1,01.01.2022 01:00 - 01.01.2022 02:00,0,0,897,330,8,151,0,1,627,...,0.0,958.0,1222,0,383,0,676,0,927,0
2,01.01.2022 02:00 - 01.01.2022 03:00,0,0,885,275,8,160,0,1,624,...,0.0,888.0,1679,0,362,0,436,0,748,0
3,01.01.2022 03:00 - 01.01.2022 04:00,0,0,890,269,8,148,0,1,624,...,0.0,788.0,1668,0,324,0,345,0,918,0
4,01.01.2022 04:00 - 01.01.2022 05:00,0,0,892,300,8,147,0,1,605,...,0.0,734.0,1697,0,339,0,412,0,826,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,31.12.2022 19:00 - 31.12.2022 20:00,0,0,992,621,8,74,0,1,823,...,0.0,1499.0,621,0,146,0,43,0,2083,0
8756,31.12.2022 20:00 - 31.12.2022 21:00,0,0,980,610,8,55,0,1,828,...,0.0,1282.0,595,0,124,0,24,0,2097,0
8757,31.12.2022 21:00 - 31.12.2022 22:00,0,0,980,588,8,67,0,1,814,...,0.0,1168.0,608,0,91,0,41,0,2007,0
8758,31.12.2022 22:00 - 31.12.2022 23:00,0,0,989,585,7,69,0,0,814,...,0.0,1033.0,504,0,90,0,0,29,2119,0


In [300]:
# merging prices, loads, actual_generation_selected and
# import_export_no1_neighbours dataframes on the date time
# column 'MTU (UTC)'

# creating a list of dataframes
no1_dataframes = [
    prices,
    load,
    actual_generation_selected,
    import_export_no1_neighbours,
    ]

# merging dataframes
no1 = reduce(lambda left, right: pd.merge(
    left,right, on=['MTU (UTC)']), no1_dataframes)

# printing out the resulting dataframe
no1

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,...,Solar - BZN|SE3,Wind Onshore - BZN|SE3,CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,129.30,129.30,41.33,129.30,41.33,4701,4440,3463,1991,...,0.0,999.0,1242,0,371,0,594,0,1065,0
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,132.08,42.18,132.08,42.18,4681,4480,3388,1969,...,0.0,958.0,1222,0,383,0,676,0,927,0
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,111.44,44.37,111.44,44.37,4620,4424,3387,1911,...,0.0,888.0,1679,0,362,0,436,0,748,0
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,112.35,37.67,112.35,37.67,4645,4386,3376,1996,...,0.0,788.0,1668,0,324,0,345,0,918,0
4,01.01.2022 04:00 - 01.01.2022 05:00,113.90,113.90,39.70,113.90,39.70,4670,4375,3427,1953,...,0.0,734.0,1697,0,339,0,412,0,826,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,31.12.2022 19:00 - 31.12.2022 20:00,123.61,123.61,23.82,123.61,11.57,4650,4333,3612,2128,...,0.0,1499.0,621,0,146,0,43,0,2083,0
8756,31.12.2022 20:00 - 31.12.2022 21:00,121.09,121.09,23.93,121.09,14.89,4573,4270,3605,2089,...,0.0,1282.0,595,0,124,0,24,0,2097,0
8757,31.12.2022 21:00 - 31.12.2022 22:00,120.00,120.00,23.75,120.00,9.94,4453,4294,3479,2037,...,0.0,1168.0,608,0,91,0,41,0,2007,0
8758,31.12.2022 22:00 - 31.12.2022 23:00,119.98,119.98,23.56,119.98,4.84,4389,4282,3415,1991,...,0.0,1033.0,504,0,90,0,0,29,2119,0


In [301]:
pd.concat(no1_dataframes, axis=1)

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,MTU (UTC).1,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,...,Wind Onshore - BZN|SE3,MTU (UTC).2,CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,129.30,129.30,41.33,129.30,41.33,01.01.2022 00:00 - 01.01.2022 01:00,4701,4440,3463,...,999.0,01.01.2022 00:00 - 01.01.2022 01:00,1242,0,371,0,594,0,1065,0
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,132.08,42.18,132.08,42.18,01.01.2022 01:00 - 01.01.2022 02:00,4681,4480,3388,...,958.0,01.01.2022 01:00 - 01.01.2022 02:00,1222,0,383,0,676,0,927,0
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,111.44,44.37,111.44,44.37,01.01.2022 02:00 - 01.01.2022 03:00,4620,4424,3387,...,888.0,01.01.2022 02:00 - 01.01.2022 03:00,1679,0,362,0,436,0,748,0
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,112.35,37.67,112.35,37.67,01.01.2022 03:00 - 01.01.2022 04:00,4645,4386,3376,...,788.0,01.01.2022 03:00 - 01.01.2022 04:00,1668,0,324,0,345,0,918,0
4,01.01.2022 04:00 - 01.01.2022 05:00,113.90,113.90,39.70,113.90,39.70,01.01.2022 04:00 - 01.01.2022 05:00,4670,4375,3427,...,734.0,01.01.2022 04:00 - 01.01.2022 05:00,1697,0,339,0,412,0,826,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,31.12.2022 19:00 - 31.12.2022 20:00,123.61,123.61,23.82,123.61,11.57,31.12.2022 19:00 - 31.12.2022 20:00,4650,4333,3612,...,1499.0,31.12.2022 19:00 - 31.12.2022 20:00,621,0,146,0,43,0,2083,0
8756,31.12.2022 20:00 - 31.12.2022 21:00,121.09,121.09,23.93,121.09,14.89,31.12.2022 20:00 - 31.12.2022 21:00,4573,4270,3605,...,1282.0,31.12.2022 20:00 - 31.12.2022 21:00,595,0,124,0,24,0,2097,0
8757,31.12.2022 21:00 - 31.12.2022 22:00,120.00,120.00,23.75,120.00,9.94,31.12.2022 21:00 - 31.12.2022 22:00,4453,4294,3479,...,1168.0,31.12.2022 21:00 - 31.12.2022 22:00,608,0,91,0,41,0,2007,0
8758,31.12.2022 22:00 - 31.12.2022 23:00,119.98,119.98,23.56,119.98,4.84,31.12.2022 22:00 - 31.12.2022 23:00,4389,4282,3415,...,1033.0,31.12.2022 22:00 - 31.12.2022 23:00,504,0,90,0,0,29,2119,0


In [302]:
print(len(prices))
print(len(load))
print(len(actual_generation_selected))
print(len(import_export_no1_neighbours))

8760
8760
8760
8760


In [303]:
pd.merge(prices, load, on=['MTU (UTC)'])

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,129.30,129.30,41.33,129.30,41.33,4701,4440,3463,1991,9612
1,01.01.2022 01:00 - 01.01.2022 02:00,132.08,132.08,42.18,132.08,42.18,4681,4480,3388,1969,9601
2,01.01.2022 02:00 - 01.01.2022 03:00,111.44,111.44,44.37,111.44,44.37,4620,4424,3387,1911,9552
3,01.01.2022 03:00 - 01.01.2022 04:00,112.35,112.35,37.67,112.35,37.67,4645,4386,3376,1996,9449
4,01.01.2022 04:00 - 01.01.2022 05:00,113.90,113.90,39.70,113.90,39.70,4670,4375,3427,1953,9614
...,...,...,...,...,...,...,...,...,...,...,...
8755,31.12.2022 19:00 - 31.12.2022 20:00,123.61,123.61,23.82,123.61,11.57,4650,4333,3612,2128,9602
8756,31.12.2022 20:00 - 31.12.2022 21:00,121.09,121.09,23.93,121.09,14.89,4573,4270,3605,2089,9251
8757,31.12.2022 21:00 - 31.12.2022 22:00,120.00,120.00,23.75,120.00,9.94,4453,4294,3479,2037,9092
8758,31.12.2022 22:00 - 31.12.2022 23:00,119.98,119.98,23.56,119.98,4.84,4389,4282,3415,1991,8944


In [304]:
pd.merge(actual_generation_selected, import_export_no1_neighbours, on=['MTU (UTC)'])



Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Gas - BZN|NO1,Hydro Run-of-river and poundage - BZN|NO1_x,Hydro Water Reservoir - BZN|NO1,Waste - BZN|NO1,Wind Onshore - BZN|NO1,Fossil Gas - BZN|NO2,Hydro Pumped Storage Aggregated- BZN|NO2,Hydro Run-of-river and poundage - BZN|NO1_y,...,Solar - BZN|SE3,Wind Onshore - BZN|SE3,CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,0,0,878,321,8,172,0,1,629,...,0.0,999.0,1242,0,371,0,594,0,1065,0
1,01.01.2022 01:00 - 01.01.2022 02:00,0,0,897,330,8,151,0,1,627,...,0.0,958.0,1222,0,383,0,676,0,927,0
2,01.01.2022 02:00 - 01.01.2022 03:00,0,0,885,275,8,160,0,1,624,...,0.0,888.0,1679,0,362,0,436,0,748,0
3,01.01.2022 03:00 - 01.01.2022 04:00,0,0,890,269,8,148,0,1,624,...,0.0,788.0,1668,0,324,0,345,0,918,0
4,01.01.2022 04:00 - 01.01.2022 05:00,0,0,892,300,8,147,0,1,605,...,0.0,734.0,1697,0,339,0,412,0,826,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,31.12.2022 19:00 - 31.12.2022 20:00,0,0,992,621,8,74,0,1,823,...,0.0,1499.0,621,0,146,0,43,0,2083,0
8756,31.12.2022 20:00 - 31.12.2022 21:00,0,0,980,610,8,55,0,1,828,...,0.0,1282.0,595,0,124,0,24,0,2097,0
8757,31.12.2022 21:00 - 31.12.2022 22:00,0,0,980,588,8,67,0,1,814,...,0.0,1168.0,608,0,91,0,41,0,2007,0
8758,31.12.2022 22:00 - 31.12.2022 23:00,0,0,989,585,7,69,0,0,814,...,0.0,1033.0,504,0,90,0,0,29,2119,0
