## Data aggregation of energy market prices in bidding zone NO1 (Norway Østlandet) and neighbouring zones for 2020 - 2022

### Dependencies

In [1]:
import numpy as np
# importing dependencies to aggregate dataset
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from functools import reduce # used to merge the dataset

### Aggregation of day-ahead price data
Pricing data is of spot (day-ahead) market prices in the currency Euro per MWh

Importing price data from each zone

In [2]:
# reading price csv to dataframes

# 2020 day-ahead price data
prices_no1_2020 = pd.read_csv("../datasets/prices/NO1 Day-ahead Prices_202001010000-202101010000.csv")
prices_no2_2020 = pd.read_csv("../datasets/prices/NO2 Day-ahead Prices_202001010000-202101010000.csv")
prices_no3_2020 = pd.read_csv("../datasets/prices/NO3 Day-ahead Prices_202001010000-202101010000.csv")
prices_no5_2020 = pd.read_csv("../datasets/prices/NO5 Day-ahead Prices_202001010000-202101010000.csv")
prices_se3_2020 = pd.read_csv("../datasets/prices/SE3 Day-ahead Prices_202001010000-202101010000.csv")

# 2021 day-ahead price data
prices_no1_2021 = pd.read_csv("../datasets/prices/NO1 Day-ahead Prices_202101010000-202201010000.csv")
prices_no2_2021 = pd.read_csv("../datasets/prices/NO2 Day-ahead Prices_202101010000-202201010000.csv")
prices_no3_2021 = pd.read_csv("../datasets/prices/NO3 Day-ahead Prices_202101010000-202201010000.csv")
prices_no5_2021 = pd.read_csv("../datasets/prices/NO5 Day-ahead Prices_202101010000-202201010000.csv")
prices_se3_2021 = pd.read_csv("../datasets/prices/SE3 Day-ahead Prices_202101010000-202201010000.csv")

# 2022 day-ahead price data
prices_no1_2022 = pd.read_csv("../datasets/prices/NO1 Day-ahead Prices_202201010000-202301010000.csv")
prices_no2_2022 = pd.read_csv("../datasets/prices/NO2 Day-ahead Prices_202201010000-202301010000.csv")
prices_no3_2022 = pd.read_csv("../datasets/prices/NO3 Day-ahead Prices_202201010000-202301010000.csv")
prices_no5_2022 = pd.read_csv("../datasets/prices/NO5 Day-ahead Prices_202201010000-202301010000.csv")
prices_se3_2022 = pd.read_csv("../datasets/prices/SE3 Day-ahead Prices_202201010000-202301010000.csv")

# creating list of price dataframes
prices_dataframes = [
    prices_no1_2020,
    prices_no2_2020,
    prices_no3_2020,
    prices_no5_2020,
    prices_se3_2020,
    prices_no1_2021,
    prices_no2_2021,
    prices_no3_2021,
    prices_no5_2021,
    prices_se3_2021,
    prices_no1_2022,
    prices_no2_2022,
    prices_no3_2022,
    prices_no5_2022,
    prices_se3_2022]

# checking shape of each dataframe to see if all dataframes have the same number of rows. 
for i in prices_dataframes:
    print(i.shape)

(8784, 4)
(8784, 4)
(8784, 4)
(8784, 4)
(8784, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)
(8760, 4)


We see that all dataframes contain the correct number of rows. row length should be 24 * 365 = 8760 width the exception of 2020 which is a leap year and should have 24 aditional rows 8760 + 24 = 8784.

We can now concatenate coresponding datasets for different years:

In [3]:
# Concatinating the dataset to add the first week of 2023 to the 2022 datasets. ignore index ignores the index number of the row in 2023 and gives it a new after being "appended"
prices_no1 = pd.concat([prices_no1_2020, prices_no1_2021, prices_no1_2022], ignore_index=True)
prices_no2 = pd.concat([prices_no2_2020, prices_no2_2021, prices_no2_2022])
prices_no3 = pd.concat([prices_no3_2020, prices_no3_2021, prices_no3_2022])
prices_no5 = pd.concat([prices_no5_2020, prices_no5_2021, prices_no5_2022])
prices_se3 = pd.concat([prices_se3_2020, prices_se3_2021, prices_se3_2022])

# creating a list of the concatinated dataframes:
concat_prices_dataframes = [prices_no1, prices_no2, prices_no3, prices_no5, prices_se3]

# checking the shape of the resulting dataframes:
for i in concat_prices_dataframes:
    print(i.shape)

(26304, 4)
(26304, 4)
(26304, 4)
(26304, 4)
(26304, 4)


We can see that the dataset has the correct number of rows (8784 + 8760 + 8760 = 26304).

In [4]:
# checking the 5 first rows of the resulting dataframe:
prices_no1.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO1
0,01.01.2020 00:00 - 01.01.2020 01:00,31.77,EUR,
1,01.01.2020 01:00 - 01.01.2020 02:00,31.57,EUR,
2,01.01.2020 02:00 - 01.01.2020 03:00,31.28,EUR,
3,01.01.2020 03:00 - 01.01.2020 04:00,30.72,EUR,
4,01.01.2020 04:00 - 01.01.2020 05:00,30.27,EUR,


In [5]:
# checking 5 last rows of the resulting dataframe:
prices_no1.tail()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO1
26299,31.12.2022 19:00 - 31.12.2022 20:00,123.61,EUR,
26300,31.12.2022 20:00 - 31.12.2022 21:00,121.09,EUR,
26301,31.12.2022 21:00 - 31.12.2022 22:00,120.0,EUR,
26302,31.12.2022 22:00 - 31.12.2022 23:00,119.98,EUR,
26303,31.12.2022 23:00 - 01.01.2023 00:00,119.32,EUR,


we can see that the resulting datafram has the correct time for the first and last row.

There might be some differences between the the dataframes of the different zones so we print out first rows of each dataset to see the structure and content of the dataframes.

In [6]:
prices_no1.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO1
0,01.01.2020 00:00 - 01.01.2020 01:00,31.77,EUR,
1,01.01.2020 01:00 - 01.01.2020 02:00,31.57,EUR,
2,01.01.2020 02:00 - 01.01.2020 03:00,31.28,EUR,
3,01.01.2020 03:00 - 01.01.2020 04:00,30.72,EUR,
4,01.01.2020 04:00 - 01.01.2020 05:00,30.27,EUR,


In [7]:
prices_no2.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO2
0,01.01.2020 00:00 - 01.01.2020 01:00,31.77,EUR,
1,01.01.2020 01:00 - 01.01.2020 02:00,31.57,EUR,
2,01.01.2020 02:00 - 01.01.2020 03:00,31.28,EUR,
3,01.01.2020 03:00 - 01.01.2020 04:00,30.72,EUR,
4,01.01.2020 04:00 - 01.01.2020 05:00,30.27,EUR,


In [8]:
prices_no3.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO3
0,01.01.2020 00:00 - 01.01.2020 01:00,28.45,EUR,
1,01.01.2020 01:00 - 01.01.2020 02:00,27.9,EUR,
2,01.01.2020 02:00 - 01.01.2020 03:00,27.52,EUR,
3,01.01.2020 03:00 - 01.01.2020 04:00,27.54,EUR,
4,01.01.2020 04:00 - 01.01.2020 05:00,26.55,EUR,


In [9]:
prices_no5.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|NO5
0,01.01.2020 00:00 - 01.01.2020 01:00,31.77,EUR,
1,01.01.2020 01:00 - 01.01.2020 02:00,31.57,EUR,
2,01.01.2020 02:00 - 01.01.2020 03:00,31.28,EUR,
3,01.01.2020 03:00 - 01.01.2020 04:00,30.72,EUR,
4,01.01.2020 04:00 - 01.01.2020 05:00,30.27,EUR,


In [10]:
prices_se3.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh],Currency,BZN|SE3
0,01.01.2020 00:00 - 01.01.2020 01:00,28.45,EUR,
1,01.01.2020 01:00 - 01.01.2020 02:00,27.9,EUR,
2,01.01.2020 02:00 - 01.01.2020 03:00,27.52,EUR,
3,01.01.2020 03:00 - 01.01.2020 04:00,27.54,EUR,
4,01.01.2020 04:00 - 01.01.2020 05:00,26.55,EUR,


#### Preparation for merging of the price data from the different zones:
The common column we will merge the data based on is the date time column "MTU (UTC)". The column currency is redundant since the column that holds price already includes it in the column name. The currency column can therefore be dropped. The datasets have a colum name for which zone the data is from, but does not include any data for each entry. This information can be added to the column name for price and the column for zone can be dropped.

In [11]:
# renaming price columns
prices_no1 = prices_no1.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO1'})
prices_no2 = prices_no2.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO2'})
prices_no3 = prices_no3.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO3'})
prices_no5 = prices_no5.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO5'})
prices_se3 = prices_se3.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|SE3'})

In [12]:
# dropping redundant columns
prices_no1= prices_no1.drop(['Currency', 'BZN|NO1'], axis=1)
prices_no2= prices_no2.drop(['Currency', 'BZN|NO2'], axis=1)
prices_no3= prices_no3.drop(['Currency', 'BZN|NO3'], axis=1)
prices_no5= prices_no5.drop(['Currency', 'BZN|NO5'], axis=1)
prices_se3= prices_se3.drop(['Currency', 'BZN|SE3'], axis=1)

In [13]:
# merging the price data into one dataframe on the date time column 'MTU (UTC)'
updated_price_dataframes = [prices_no1, prices_no2, prices_no3, prices_no5, prices_se3]
prices = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), updated_price_dataframes)

# printing out the 5 first rows resulting dataframe
prices.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3
0,01.01.2020 00:00 - 01.01.2020 01:00,31.77,31.77,28.45,31.77,28.45
1,01.01.2020 01:00 - 01.01.2020 02:00,31.57,31.57,27.9,31.57,27.9
2,01.01.2020 02:00 - 01.01.2020 03:00,31.28,31.28,27.52,31.28,27.52
3,01.01.2020 03:00 - 01.01.2020 04:00,30.72,30.72,27.54,30.72,27.54
4,01.01.2020 04:00 - 01.01.2020 05:00,30.27,30.27,26.55,30.27,26.55


In [14]:
# printing out the last 5 rows of the resulting dataframe
prices.tail()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3
26299,31.12.2022 19:00 - 31.12.2022 20:00,123.61,123.61,23.82,123.61,11.57
26300,31.12.2022 20:00 - 31.12.2022 21:00,121.09,121.09,23.93,121.09,14.89
26301,31.12.2022 21:00 - 31.12.2022 22:00,120.0,120.0,23.75,120.0,9.94
26302,31.12.2022 22:00 - 31.12.2022 23:00,119.98,119.98,23.56,119.98,4.84
26303,31.12.2022 23:00 - 01.01.2023 00:00,119.32,119.32,23.35,119.32,2.01


We can se that the datetime of the first and last rows in the dataset are correct. we also want to check if the dataframe still have the correct number of rows to se if the merge was successful

In [15]:
# printing out shape of dataframe
prices.shape

(26304, 6)

The prices dataframe has the correct number of rows (26304) so the aggregation of the price datasets have been successful.

We also want to check if the data has any missing values.

In [16]:
# checking data for missing values, if any
prices.isna().sum()

MTU (UTC)                            0
Day-ahead Price [EUR/MWh] BZN|NO1    0
Day-ahead Price [EUR/MWh] BZN|NO2    0
Day-ahead Price [EUR/MWh] BZN|NO3    0
Day-ahead Price [EUR/MWh] BZN|NO5    0
Day-ahead Price [EUR/MWh] BZN|SE3    0
dtype: int64

The data has no missing values.

### Aggregation of load data
The load data contains data about the power consumption in each zone

Importing load data from each zone

In [17]:
# reading load csv to dataframe

# 2020
load_no1_2020 = pd.read_csv("../datasets/load/NO1 Total Load - Day Ahead _ Actual_202001010000-202101010000.csv")
load_no2_2020 = pd.read_csv("../datasets/load/NO2 Total Load - Day Ahead _ Actual_202001010000-202101010000.csv")
load_no3_2020 = pd.read_csv("../datasets/load/NO3 Total Load - Day Ahead _ Actual_202001010000-202101010000.csv")
load_no5_2020 = pd.read_csv("../datasets/load/NO5 Total Load - Day Ahead _ Actual_202001010000-202101010000.csv")
load_se3_2020 = pd.read_csv("../datasets/load/SE3 Total Load - Day Ahead _ Actual_202001010000-202101010000.csv")

# 2021
load_no1_2021 = pd.read_csv("../datasets/load/NO1_Total Load - Day Ahead _ Actual_202101010000-202201010000.csv")
load_no2_2021 = pd.read_csv("../datasets/load/NO2_Total Load - Day Ahead _ Actual_202101010000-202201010000.csv")
load_no3_2021 = pd.read_csv("../datasets/load/NO3_Total Load - Day Ahead _ Actual_202101010000-202201010000.csv")
load_no5_2021 = pd.read_csv("../datasets/load/NO5_Total Load - Day Ahead _ Actual_202101010000-202201010000.csv")
load_se3_2021 = pd.read_csv("../datasets/load/SE3 Total Load - Day Ahead _ Actual_202101010000-202201010000.csv")

# 2022
load_no1_2022 = pd.read_csv("../datasets/load/NO1 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no2_2022 = pd.read_csv("../datasets/load/NO2 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no3_2022 = pd.read_csv("../datasets/load/NO3 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no5_2022 = pd.read_csv("../datasets/load/NO5 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_se3_2022 = pd.read_csv("../datasets/load/SE3 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")

# creating list of load dataframes
load_dataframes = prices_dataframes = [
    load_no1_2020,
    load_no2_2020,
    load_no3_2020,
    load_no5_2020,
    load_se3_2020,
    load_no1_2021,
    load_no2_2021,
    load_no3_2021,
    load_no5_2021,
    load_se3_2021,
    load_no1_2022,
    load_no2_2022,
    load_no3_2022,
    load_no5_2022,
    load_se3_2022]

# checking shape of each dataframe to see if all dataframes have the same number of rows.
for i in load_dataframes:
    print(i.shape)

(8784, 3)
(8784, 3)
(8784, 3)
(8784, 3)
(8784, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)


We see that all dataframes contain the correct number of rows. row length should be 24 * 365 = 8760 width the exception of 2020 which is a leap year and should have 24 aditional rows 8760 + 24 = 8784.

We can now concatenate coresponding datasets for different years:

In [18]:
# Concatinating the dataset to add the first week of 2023 to the 2022 datasets. ignore index ignores the index number of the row in 2023 and gives it a new after being "appended"
load_no1 = pd.concat([load_no1_2020, load_no1_2021, load_no1_2022], ignore_index=True)
load_no2 = pd.concat([load_no2_2020, load_no2_2021, load_no2_2022])
load_no3 = pd.concat([load_no3_2020, load_no3_2021, load_no3_2022])
load_no5 = pd.concat([load_no5_2020, load_no5_2021, load_no5_2022])
load_se3 = pd.concat([load_se3_2020, load_se3_2021, load_se3_2022])

# creating a list of the concatinated dataframes:
concat_load_dataframes = [load_no1, load_no2, load_no3, load_no5, load_se3]

# checking the shape of the resulting dataframes:
for i in concat_load_dataframes:
    print(i.shape)

(26304, 3)
(26304, 3)
(26304, 3)
(26304, 3)
(26304, 3)


We can see that the dataset has the correct number of rows (8784 + 8760 + 8760 = 26304)

In [19]:
# checking the 5 first rows of the resulting dataframe:
load_no1.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO1
0,01.01.2020 00:00 - 01.01.2020 01:00,4316.0,4333.0
1,01.01.2020 01:00 - 01.01.2020 02:00,4270.0,4250.0
2,01.01.2020 02:00 - 01.01.2020 03:00,4209.0,4167.0
3,01.01.2020 03:00 - 01.01.2020 04:00,4214.0,4145.0
4,01.01.2020 04:00 - 01.01.2020 05:00,4250.0,4222.0


In [20]:
# checking the 5 last rows of the resulting dataframe:
load_no1.tail()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO1
26299,31.12.2022 19:00 - 31.12.2022 20:00,4677.0,4650.0
26300,31.12.2022 20:00 - 31.12.2022 21:00,4582.0,4573.0
26301,31.12.2022 21:00 - 31.12.2022 22:00,4554.0,4453.0
26302,31.12.2022 22:00 - 31.12.2022 23:00,4434.0,4389.0
26303,31.12.2022 23:00 - 01.01.2023 00:00,4384.0,4444.0


we can see that the resulting datafram has the correct time for the first and last row.

There might be some differences between the the dataframes of the different zones so we print out first rows of each dataset to see the structure and content of the dataframes.

In [21]:
load_no1.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO1
0,01.01.2020 00:00 - 01.01.2020 01:00,4316.0,4333.0
1,01.01.2020 01:00 - 01.01.2020 02:00,4270.0,4250.0
2,01.01.2020 02:00 - 01.01.2020 03:00,4209.0,4167.0
3,01.01.2020 03:00 - 01.01.2020 04:00,4214.0,4145.0
4,01.01.2020 04:00 - 01.01.2020 05:00,4250.0,4222.0


In [22]:
load_no2.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO2
0,01.01.2020 00:00 - 01.01.2020 01:00,3770.0,4139.0
1,01.01.2020 01:00 - 01.01.2020 02:00,3717.0,4114.0
2,01.01.2020 02:00 - 01.01.2020 03:00,3680.0,4030.0
3,01.01.2020 03:00 - 01.01.2020 04:00,3668.0,4032.0
4,01.01.2020 04:00 - 01.01.2020 05:00,3754.0,4032.0


In [23]:
load_no3.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO3
0,01.01.2020 00:00 - 01.01.2020 01:00,3283.0,3016.0
1,01.01.2020 01:00 - 01.01.2020 02:00,3234.0,2945.0
2,01.01.2020 02:00 - 01.01.2020 03:00,3189.0,3032.0
3,01.01.2020 03:00 - 01.01.2020 04:00,3198.0,2988.0
4,01.01.2020 04:00 - 01.01.2020 05:00,3215.0,2848.0


In [24]:
load_no5.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|NO5,Actual Total Load [MW] - BZN|NO5
0,01.01.2020 00:00 - 01.01.2020 01:00,1843.0,1950.0
1,01.01.2020 01:00 - 01.01.2020 02:00,1806.0,1926.0
2,01.01.2020 02:00 - 01.01.2020 03:00,1773.0,1913.0
3,01.01.2020 03:00 - 01.01.2020 04:00,1770.0,1894.0
4,01.01.2020 04:00 - 01.01.2020 05:00,1752.0,1901.0


In [25]:
load_se3.head()

Unnamed: 0,Time (UTC),Day-ahead Total Load Forecast [MW] - BZN|SE3,Actual Total Load [MW] - BZN|SE3
0,01.01.2020 00:00 - 01.01.2020 01:00,9214,9350
1,01.01.2020 01:00 - 01.01.2020 02:00,9079,9158
2,01.01.2020 02:00 - 01.01.2020 03:00,8966,8980
3,01.01.2020 03:00 - 01.01.2020 04:00,8919,8914
4,01.01.2020 04:00 - 01.01.2020 05:00,8980,8964


#### preparation for merging of the load data from the different zones:
The common values we will merge the data based on is the date time column "Time (UTC)", however this column is named "MTU (UTC)" in the prices' dataset. The time column will be renamed to "MTU (UTC)" so that the datasets can be merged. The column "Day-ahead Total Load Forecast" is redundant since we have the actual total load, so the forecast column will be dropped.

In [26]:
# dropping redundant columns
load_no1 = load_no1.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO1'], axis=1)
load_no2 = load_no2.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO2'], axis=1)
load_no3 = load_no3.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO3'], axis=1)
load_no5 = load_no5.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO5'], axis=1)
load_se3 = load_se3.drop(['Day-ahead Total Load Forecast [MW] - BZN|SE3'], axis=1)

In [27]:
# renaming time columns
load_no1 = load_no1.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_no2 = load_no2.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_no3 = load_no3.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_no5 = load_no5.rename(columns={'Time (UTC)': 'MTU (UTC)'})
load_se3 = load_se3.rename(columns={'Time (UTC)': 'MTU (UTC)'})

In [28]:
# merging the load data into one dataframe on the date time column 'MTU (UTC)'
updated_load_dataframes = [load_no1, load_no2, load_no3, load_no5, load_se3]
load = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), updated_load_dataframes)

# printing out the resulting dataframe
load.head()

Unnamed: 0,MTU (UTC),Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
0,01.01.2020 00:00 - 01.01.2020 01:00,4333.0,4139.0,3016.0,1950.0,9350
1,01.01.2020 01:00 - 01.01.2020 02:00,4250.0,4114.0,2945.0,1926.0,9158
2,01.01.2020 02:00 - 01.01.2020 03:00,4167.0,4030.0,3032.0,1913.0,8980
3,01.01.2020 03:00 - 01.01.2020 04:00,4145.0,4032.0,2988.0,1894.0,8914
4,01.01.2020 04:00 - 01.01.2020 05:00,4222.0,4032.0,2848.0,1901.0,8964


In [29]:
# printing out the last 5 rows of the resulting dataframe
load.tail()

Unnamed: 0,MTU (UTC),Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
26299,31.12.2022 19:00 - 31.12.2022 20:00,4650.0,4333.0,3612.0,2128.0,9602
26300,31.12.2022 20:00 - 31.12.2022 21:00,4573.0,4270.0,3605.0,2089.0,9251
26301,31.12.2022 21:00 - 31.12.2022 22:00,4453.0,4294.0,3479.0,2037.0,9092
26302,31.12.2022 22:00 - 31.12.2022 23:00,4389.0,4282.0,3415.0,1991.0,8944
26303,31.12.2022 23:00 - 01.01.2023 00:00,4444.0,4182.0,3346.0,2009.0,8955


We can se that the datetime of the first and last rows in the dataset are correct. we also want to check if the dataframe still have the correct number of rows to se if the merge was successful.

In [30]:
# printing out shape of dataframe
load.shape

(26304, 6)

The load dataframe has the correct number of rows (26304) so the aggregation of the price datasets have been successful.

We also want to check if the dataset has any missing any values:

In [31]:
# checking data for missing values, if any
load.isna().sum()

MTU (UTC)                           0
Actual Total Load [MW] - BZN|NO1    1
Actual Total Load [MW] - BZN|NO2    1
Actual Total Load [MW] - BZN|NO3    1
Actual Total Load [MW] - BZN|NO5    1
Actual Total Load [MW] - BZN|SE3    0
dtype: int64

In [32]:
# checking wich row is missing values
load[load['Actual Total Load [MW] - BZN|NO1'].isnull()]

Unnamed: 0,MTU (UTC),Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
16057,31.10.2021 01:00 - 31.10.2021 02:00,,,,,7786


It seems like none of the Norwegian bidding zones have recorded data for the time inteval 31.10.2021 01:00 - 31.10.2021 02:00.

Handling of datasets can be preformed thorugh many means, but since this is time series data we don't want to drop any rows, we are therefore left with imputation of the missing values (i.e generating values in place of the missing values wich aims to reduce the noise in the dataset). Which method of imputation should be used will depend on the dataset. We will postpone this process untill the Exploratory Data Analysis, where we can investigate the dataset and compare the different methods, and choose the best method

### Aggregation of energy generation data

In [33]:
actual_generation_no1 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000NO1.csv")
actual_generation_no2 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000 NO2.csv")
actual_generation_no3 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000NO3.csv")
actual_generation_no5 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000NO5.csv")
actual_generation_se3 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/Actual Generation per Production Type_202201010000-202301010000SE3.csv")
actual_generation_no1_2020 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2020/NO1Actual Generation per Production Type_202001010000-202101010000.csv")
actual_generation_no2_2020 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2020/NO2Actual Generation per Production Type_202001010000-202101010000.csv")
actual_generation_no3_2020 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2020/NO3Actual Generation per Production Type_202001010000-202101010000.csv")
actual_generation_no5_2020 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2020/NO5Actual Generation per Production Type_202001010000-202101010000.csv")
actual_generation_se3_2020 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2020/SE3Actual Generation per Production Type_202001010000-202101010000.csv")
actual_generation_no1_2021 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2021/NO1Actual Generation per Production Type_202101010000-202201010000.csv")
actual_generation_no2_2021 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2021/NO2Actual Generation per Production Type_202101010000-202201010000.csv")
actual_generation_no3_2021 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2021/NO3Actual Generation per Production Type_202101010000-202201010000.csv")
actual_generation_no5_2021 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2021/NO5Actual Generation per Production Type_202101010000-202201010000.csv")
actual_generation_se3_2021 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/UTC/2021/SE3Actual Generation per Production Type_202101010000-202201010000.csv")

In [34]:
actual_generation_no1.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO1,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,878,321,n/e,n/e,n/e,n/e,n/e,8,n/e,172
1,BZN|NO1,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,897,330,n/e,n/e,n/e,n/e,n/e,8,n/e,151
2,BZN|NO1,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,885,275,n/e,n/e,n/e,n/e,n/e,8,n/e,160
3,BZN|NO1,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,890,269,n/e,n/e,n/e,n/e,n/e,8,n/e,148
4,BZN|NO1,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,892,300,n/e,n/e,n/e,n/e,n/e,8,n/e,147


In [35]:
actual_generation_no2.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO2,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,629,1874,n/e,n/e,n/e,n/e,n/e,5,n/e,244
1,BZN|NO2,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,627,2012,n/e,n/e,n/e,n/e,n/e,4,n/e,115
2,BZN|NO2,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,624,1643,n/e,n/e,n/e,n/e,n/e,4,n/e,55
3,BZN|NO2,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,624,1631,n/e,n/e,n/e,n/e,n/e,4,n/e,61
4,BZN|NO2,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,605,1642,n/e,n/e,n/e,n/e,n/e,4,n/e,88


In [36]:
actual_generation_no3.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO3,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,389,2166,n/e,n/e,0,12,n/e,3,n/e,219
1,BZN|NO3,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,390,2187,n/e,n/e,0,12,n/e,2,n/e,170
2,BZN|NO3,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,405,2183,n/e,n/e,0,12,n/e,1,n/e,126
3,BZN|NO3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,393,2076,n/e,n/e,0,12,n/e,0,n/e,81
4,BZN|NO3,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,398,2098,n/e,n/e,0,11,n/e,0,n/e,45


In [37]:
actual_generation_no5.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO5,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,227,1759,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e
1,BZN|NO5,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,216,1890,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
2,BZN|NO5,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,215,1460,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
3,BZN|NO5,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,209,1394,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
4,BZN|NO5,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,210,1488,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e


In [38]:
actual_generation_se3.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|SE3,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5842.0,851.0,n/e,0.0,n/e,n/e,999.0
1,BZN|SE3,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5839.0,858.0,n/e,0.0,n/e,n/e,958.0
2,BZN|SE3,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,838.0,0,5839.0,859.0,n/e,0.0,n/e,n/e,888.0
3,BZN|SE3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,842.0,0,5840.0,856.0,n/e,0.0,n/e,n/e,788.0
4,BZN|SE3,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,841.0,0,5839.0,880.0,n/e,0.0,n/e,n/e,734.0


In [39]:
generation_data = [actual_generation_no1_2020, actual_generation_no1_2021, actual_generation_no1, actual_generation_no2_2020, actual_generation_no2_2021, actual_generation_no2, actual_generation_no3_2020, actual_generation_no3_2021, actual_generation_no3, actual_generation_no5_2020, actual_generation_no5_2021, actual_generation_no5, actual_generation_se3_2020, actual_generation_se3_2021, actual_generation_se3]
for df in generation_data:
    data = df[df.isna().any(axis=1)]
    missing_values = pd.DataFrame(data=data)
    display(missing_values)

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
4959,BZN|NO1,25.07.2020 15:00 - 25.07.2020 16:00 (UTC),n/e,n/e,n/e,,n/e,n/e,n/e,n/e,...,,,n/e,n/e,n/e,n/e,n/e,,n/e,
4960,BZN|NO1,25.07.2020 16:00 - 25.07.2020 17:00 (UTC),n/e,n/e,n/e,,n/e,n/e,n/e,n/e,...,,,n/e,n/e,n/e,n/e,n/e,,n/e,
4961,BZN|NO1,25.07.2020 17:00 - 25.07.2020 18:00 (UTC),n/e,n/e,n/e,,n/e,n/e,n/e,n/e,...,,,n/e,n/e,n/e,n/e,n/e,,n/e,
4962,BZN|NO1,25.07.2020 18:00 - 25.07.2020 19:00 (UTC),n/e,n/e,n/e,,n/e,n/e,n/e,n/e,...,,,n/e,n/e,n/e,n/e,n/e,,n/e,
4963,BZN|NO1,25.07.2020 19:00 - 25.07.2020 20:00 (UTC),n/e,n/e,n/e,,n/e,n/e,n/e,n/e,...,,,n/e,n/e,n/e,n/e,n/e,,n/e,


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
7174,BZN|NO1,26.10.2021 22:00 - 26.10.2021 23:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,554,1430,n/e,n/e,n/e,n/e,n/e,3,n/e,143
7175,BZN|NO1,26.10.2021 23:00 - 27.10.2021 00:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,552,1348,n/e,n/e,n/e,n/e,n/e,3,n/e,126
7176,BZN|NO1,27.10.2021 00:00 - 27.10.2021 01:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,553,1344,n/e,n/e,n/e,n/e,n/e,3,n/e,148
7177,BZN|NO1,27.10.2021 01:00 - 27.10.2021 02:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,551,1408,n/e,n/e,n/e,n/e,n/e,3,n/e,180
7178,BZN|NO1,27.10.2021 02:00 - 27.10.2021 03:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,552,1312,n/e,n/e,n/e,n/e,n/e,3,n/e,198
7179,BZN|NO1,27.10.2021 03:00 - 27.10.2021 04:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,552,1332,n/e,n/e,n/e,n/e,n/e,3,n/e,203
7180,BZN|NO1,27.10.2021 04:00 - 27.10.2021 05:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,552,1480,n/e,n/e,n/e,n/e,n/e,3,n/e,206
7181,BZN|NO1,27.10.2021 05:00 - 27.10.2021 06:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,552,1746,n/e,n/e,n/e,n/e,n/e,3,n/e,203
7182,BZN|NO1,27.10.2021 06:00 - 27.10.2021 07:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,553,1743,n/e,n/e,n/e,n/e,n/e,3,n/e,202
7183,BZN|NO1,27.10.2021 07:00 - 27.10.2021 08:00 (UTC),,n/e,n/e,0,n/e,n/e,n/e,n/e,...,552,1694,n/e,n/e,n/e,n/e,n/e,3,n/e,181


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
4390,BZN|NO2,01.07.2020 22:00 - 01.07.2020 23:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,206.0,2043.0,n/e,n/e,n/e,n/e,n/e,13,n/e,471.0
4391,BZN|NO2,01.07.2020 23:00 - 02.07.2020 00:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,206.0,1871.0,n/e,n/e,n/e,n/e,n/e,13,n/e,480.0
4392,BZN|NO2,02.07.2020 00:00 - 02.07.2020 01:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,207.0,1845.0,n/e,n/e,n/e,n/e,n/e,13,n/e,465.0
4393,BZN|NO2,02.07.2020 01:00 - 02.07.2020 02:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,206.0,1855.0,n/e,n/e,n/e,n/e,n/e,13,n/e,445.0
4394,BZN|NO2,02.07.2020 02:00 - 02.07.2020 03:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,205.0,1864.0,n/e,n/e,n/e,n/e,n/e,13,n/e,477.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8779,BZN|NO2,31.12.2020 19:00 - 31.12.2020 20:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,387.0,6908.0,n/e,n/e,n/e,n/e,n/e,5,n/e,16.0
8780,BZN|NO2,31.12.2020 20:00 - 31.12.2020 21:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,387.0,6813.0,n/e,n/e,n/e,n/e,n/e,5,n/e,18.0
8781,BZN|NO2,31.12.2020 21:00 - 31.12.2020 22:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,392.0,6506.0,n/e,n/e,n/e,n/e,n/e,6,n/e,17.0
8782,BZN|NO2,31.12.2020 22:00 - 31.12.2020 23:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,391.0,6449.0,n/e,n/e,n/e,n/e,n/e,6,n/e,17.0


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO2,01.01.2021 00:00 - 01.01.2021 01:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,379,5927,n/e,n/e,n/e,n/e,n/e,6,n/e,27
1,BZN|NO2,01.01.2021 01:00 - 01.01.2021 02:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,376,5291,n/e,n/e,n/e,n/e,n/e,6,n/e,30
2,BZN|NO2,01.01.2021 02:00 - 01.01.2021 03:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,376,5137,n/e,n/e,n/e,n/e,n/e,6,n/e,34
3,BZN|NO2,01.01.2021 03:00 - 01.01.2021 04:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,379,5102,n/e,n/e,n/e,n/e,n/e,6,n/e,34
4,BZN|NO2,01.01.2021 04:00 - 01.01.2021 05:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,381,5363,n/e,n/e,n/e,n/e,n/e,6,n/e,33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BZN|NO2,31.12.2021 19:00 - 31.12.2021 20:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,651,3003,n/e,n/e,n/e,n/e,n/e,9,n/e,472
8756,BZN|NO2,31.12.2021 20:00 - 31.12.2021 21:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,650,2896,n/e,n/e,n/e,n/e,n/e,9,n/e,540
8757,BZN|NO2,31.12.2021 21:00 - 31.12.2021 22:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,650,2636,n/e,n/e,n/e,n/e,n/e,9,n/e,468
8758,BZN|NO2,31.12.2021 22:00 - 31.12.2021 23:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,644,2436,n/e,n/e,n/e,n/e,n/e,7,n/e,449


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO2,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,629,1874,n/e,n/e,n/e,n/e,n/e,5,n/e,244
1,BZN|NO2,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,627,2012,n/e,n/e,n/e,n/e,n/e,4,n/e,115
2,BZN|NO2,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,624,1643,n/e,n/e,n/e,n/e,n/e,4,n/e,55
3,BZN|NO2,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,624,1631,n/e,n/e,n/e,n/e,n/e,4,n/e,61
4,BZN|NO2,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,605,1642,n/e,n/e,n/e,n/e,n/e,4,n/e,88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BZN|NO2,31.12.2022 19:00 - 31.12.2022 20:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,823,2399,n/e,n/e,n/e,n/e,n/e,13,n/e,155
8756,BZN|NO2,31.12.2022 20:00 - 31.12.2022 21:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,828,2265,n/e,n/e,n/e,n/e,n/e,13,n/e,137
8757,BZN|NO2,31.12.2022 21:00 - 31.12.2022 22:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,814,2265,n/e,n/e,n/e,n/e,n/e,13,n/e,94
8758,BZN|NO2,31.12.2022 22:00 - 31.12.2022 23:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,814,2098,n/e,n/e,n/e,n/e,n/e,13,n/e,96


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
4390,BZN|NO3,01.07.2020 22:00 - 01.07.2020 23:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,264.0,2322.0,n/e,n/e,19.0,13,n/e,3,n/e,38.0
4391,BZN|NO3,01.07.2020 23:00 - 02.07.2020 00:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,262.0,2211.0,n/e,n/e,19.0,12,n/e,4,n/e,39.0
4392,BZN|NO3,02.07.2020 00:00 - 02.07.2020 01:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,261.0,2094.0,n/e,n/e,19.0,12,n/e,2,n/e,40.0
4393,BZN|NO3,02.07.2020 01:00 - 02.07.2020 02:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,259.0,2092.0,n/e,n/e,19.0,12,n/e,2,n/e,25.0
4394,BZN|NO3,02.07.2020 02:00 - 02.07.2020 03:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,257.0,2134.0,n/e,n/e,19.0,12,n/e,0,n/e,29.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8779,BZN|NO3,31.12.2020 19:00 - 31.12.2020 20:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,79.0,2676.0,n/e,n/e,0.0,11,n/e,4,n/e,349.0
8780,BZN|NO3,31.12.2020 20:00 - 31.12.2020 21:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,78.0,2700.0,n/e,n/e,0.0,11,n/e,1,n/e,328.0
8781,BZN|NO3,31.12.2020 21:00 - 31.12.2020 22:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,78.0,2684.0,n/e,n/e,0.0,10,n/e,4,n/e,295.0
8782,BZN|NO3,31.12.2020 22:00 - 31.12.2020 23:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,76.0,2649.0,n/e,n/e,0.0,11,n/e,1,n/e,246.0


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO3,01.01.2021 00:00 - 01.01.2021 01:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,67,2529,n/e,n/e,0,11.0,n/e,0,n/e,229
1,BZN|NO3,01.01.2021 01:00 - 01.01.2021 02:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,67,2527,n/e,n/e,0,10.0,n/e,1,n/e,269
2,BZN|NO3,01.01.2021 02:00 - 01.01.2021 03:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,67,2519,n/e,n/e,0,10.0,n/e,1,n/e,268
3,BZN|NO3,01.01.2021 03:00 - 01.01.2021 04:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,66,2503,n/e,n/e,0,11.0,n/e,2,n/e,245
4,BZN|NO3,01.01.2021 04:00 - 01.01.2021 05:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,66,2526,n/e,n/e,0,11.0,n/e,4,n/e,253
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BZN|NO3,31.12.2021 19:00 - 31.12.2021 20:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,401,2231,n/e,n/e,0,11.0,n/e,0,n/e,437
8756,BZN|NO3,31.12.2021 20:00 - 31.12.2021 21:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,371,2166,n/e,n/e,0,11.0,n/e,2,n/e,693
8757,BZN|NO3,31.12.2021 21:00 - 31.12.2021 22:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,361,2197,n/e,n/e,0,12.0,n/e,1,n/e,782
8758,BZN|NO3,31.12.2021 22:00 - 31.12.2021 23:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,354,2116,n/e,n/e,0,12.0,n/e,2,n/e,663


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO3,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,389,2166,n/e,n/e,0,12,n/e,3,n/e,219
1,BZN|NO3,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,390,2187,n/e,n/e,0,12,n/e,2,n/e,170
2,BZN|NO3,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,405,2183,n/e,n/e,0,12,n/e,1,n/e,126
3,BZN|NO3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,393,2076,n/e,n/e,0,12,n/e,0,n/e,81
4,BZN|NO3,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,0,n/e,n/e,n/e,n/e,...,398,2098,n/e,n/e,0,11,n/e,0,n/e,45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BZN|NO3,31.12.2022 19:00 - 31.12.2022 20:00 (UTC),n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,...,187,987,n/e,n/e,0,23,n/e,0,n/e,1576
8756,BZN|NO3,31.12.2022 20:00 - 31.12.2022 21:00 (UTC),n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,...,187,997,n/e,n/e,0,23,n/e,0,n/e,1548
8757,BZN|NO3,31.12.2022 21:00 - 31.12.2022 22:00 (UTC),n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,...,182,868,n/e,n/e,0,23,n/e,0,n/e,1525
8758,BZN|NO3,31.12.2022 22:00 - 31.12.2022 23:00 (UTC),n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,...,183,814,n/e,n/e,0,23,n/e,0,n/e,1498


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
4390,BZN|NO5,01.07.2020 22:00 - 01.07.2020 23:00 (UTC),n/e,n/e,n/e,80.0,n/e,n/e,n/e,n/e,...,90.0,3041.0,n/e,n/e,n/e,n/e,n/e,18,n/e,n/e
4391,BZN|NO5,01.07.2020 23:00 - 02.07.2020 00:00 (UTC),n/e,n/e,n/e,79.0,n/e,n/e,n/e,n/e,...,92.0,2926.0,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e
4392,BZN|NO5,02.07.2020 00:00 - 02.07.2020 01:00 (UTC),n/e,n/e,n/e,79.0,n/e,n/e,n/e,n/e,...,93.0,2746.0,n/e,n/e,n/e,n/e,n/e,18,n/e,n/e
4393,BZN|NO5,02.07.2020 01:00 - 02.07.2020 02:00 (UTC),n/e,n/e,n/e,79.0,n/e,n/e,n/e,n/e,...,93.0,2715.0,n/e,n/e,n/e,n/e,n/e,18,n/e,n/e
4394,BZN|NO5,02.07.2020 02:00 - 02.07.2020 03:00 (UTC),n/e,n/e,n/e,77.0,n/e,n/e,n/e,n/e,...,94.0,2751.0,n/e,n/e,n/e,n/e,n/e,18,n/e,n/e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8779,BZN|NO5,31.12.2020 19:00 - 31.12.2020 20:00 (UTC),n/e,n/e,n/e,78.0,n/e,n/e,n/e,n/e,...,26.0,3956.0,n/e,n/e,n/e,n/e,n/e,10,n/e,n/e
8780,BZN|NO5,31.12.2020 20:00 - 31.12.2020 21:00 (UTC),n/e,n/e,n/e,78.0,n/e,n/e,n/e,n/e,...,27.0,3884.0,n/e,n/e,n/e,n/e,n/e,10,n/e,n/e
8781,BZN|NO5,31.12.2020 21:00 - 31.12.2020 22:00 (UTC),n/e,n/e,n/e,78.0,n/e,n/e,n/e,n/e,...,27.0,3758.0,n/e,n/e,n/e,n/e,n/e,10,n/e,n/e
8782,BZN|NO5,31.12.2020 22:00 - 31.12.2020 23:00 (UTC),n/e,n/e,n/e,78.0,n/e,n/e,n/e,n/e,...,26.0,3633.0,n/e,n/e,n/e,n/e,n/e,10,n/e,n/e


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO5,01.01.2021 00:00 - 01.01.2021 01:00 (UTC),n/e,n/e,n/e,78,n/e,n/e,n/e,n/e,...,27,3723,n/e,n/e,n/e,n/e,n/e,10,n/e,n/e
1,BZN|NO5,01.01.2021 01:00 - 01.01.2021 02:00 (UTC),n/e,n/e,n/e,78,n/e,n/e,n/e,n/e,...,26,3581,n/e,n/e,n/e,n/e,n/e,10,n/e,n/e
2,BZN|NO5,01.01.2021 02:00 - 01.01.2021 03:00 (UTC),n/e,n/e,n/e,78,n/e,n/e,n/e,n/e,...,26,3431,n/e,n/e,n/e,n/e,n/e,14,n/e,n/e
3,BZN|NO5,01.01.2021 03:00 - 01.01.2021 04:00 (UTC),n/e,n/e,n/e,78,n/e,n/e,n/e,n/e,...,26,3561,n/e,n/e,n/e,n/e,n/e,11,n/e,n/e
4,BZN|NO5,01.01.2021 04:00 - 01.01.2021 05:00 (UTC),n/e,n/e,n/e,78,n/e,n/e,n/e,n/e,...,26,3599,n/e,n/e,n/e,n/e,n/e,11,n/e,n/e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BZN|NO5,31.12.2021 19:00 - 31.12.2021 20:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,281,2244,n/e,n/e,n/e,n/e,n/e,14,n/e,n/e
8756,BZN|NO5,31.12.2021 20:00 - 31.12.2021 21:00 (UTC),n/e,n/e,n/e,83,n/e,n/e,n/e,n/e,...,270,2093,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e
8757,BZN|NO5,31.12.2021 21:00 - 31.12.2021 22:00 (UTC),n/e,n/e,n/e,85,n/e,n/e,n/e,n/e,...,226,2087,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
8758,BZN|NO5,31.12.2021 22:00 - 31.12.2021 23:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,225,2045,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO5,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,227,1759,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e
1,BZN|NO5,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,216,1890,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
2,BZN|NO5,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,215,1460,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
3,BZN|NO5,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,209,1394,n/e,n/e,n/e,n/e,n/e,20,n/e,n/e
4,BZN|NO5,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),n/e,n/e,n/e,82,n/e,n/e,n/e,n/e,...,210,1488,n/e,n/e,n/e,n/e,n/e,19,n/e,n/e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BZN|NO5,31.12.2022 19:00 - 31.12.2022 20:00 (UTC),n/e,n/e,n/e,7,n/e,n/e,n/e,n/e,...,148,1593,n/e,n/e,n/e,n/e,n/e,14,n/e,n/e
8756,BZN|NO5,31.12.2022 20:00 - 31.12.2022 21:00 (UTC),n/e,n/e,n/e,7,n/e,n/e,n/e,n/e,...,135,1568,n/e,n/e,n/e,n/e,n/e,14,n/e,n/e
8757,BZN|NO5,31.12.2022 21:00 - 31.12.2022 22:00 (UTC),n/e,n/e,n/e,7,n/e,n/e,n/e,n/e,...,127,1634,n/e,n/e,n/e,n/e,n/e,14,n/e,n/e
8758,BZN|NO5,31.12.2022 22:00 - 31.12.2022 23:00 (UTC),n/e,n/e,n/e,7,n/e,n/e,n/e,n/e,...,137,1521,n/e,n/e,n/e,n/e,n/e,14,n/e,n/e


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]


Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
983,BZN|SE3,10.02.2022 23:00 - 11.02.2022 00:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,1004.0,,6979.0,713.0,n/e,0.0,n/e,n/e,1502.0
984,BZN|SE3,11.02.2022 00:00 - 11.02.2022 01:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,914.0,,6981.0,717.0,n/e,0.0,n/e,n/e,1365.0
985,BZN|SE3,11.02.2022 01:00 - 11.02.2022 02:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,954.0,,6982.0,716.0,n/e,0.0,n/e,n/e,1315.0
986,BZN|SE3,11.02.2022 02:00 - 11.02.2022 03:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,968.0,,6980.0,717.0,n/e,0.0,n/e,n/e,1231.0
987,BZN|SE3,11.02.2022 03:00 - 11.02.2022 04:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,994.0,,6982.0,763.0,n/e,0.0,n/e,n/e,1176.0
988,BZN|SE3,11.02.2022 04:00 - 11.02.2022 05:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,1162.0,,6983.0,818.0,n/e,0.0,n/e,n/e,1188.0
989,BZN|SE3,11.02.2022 05:00 - 11.02.2022 06:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,1331.0,,6982.0,922.0,n/e,0.0,n/e,n/e,1173.0
990,BZN|SE3,11.02.2022 06:00 - 11.02.2022 07:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,1384.0,,6984.0,958.0,n/e,0.0,n/e,n/e,1173.0
991,BZN|SE3,11.02.2022 07:00 - 11.02.2022 08:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,1423.0,,6966.0,1016.0,n/e,5.0,n/e,n/e,1145.0
992,BZN|SE3,11.02.2022 08:00 - 11.02.2022 09:00 (UTC),n/e,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,n/e,1426.0,,6964.0,1051.0,n/e,30.0,n/e,n/e,1073.0


In [40]:
# Renaming every column to have zones instead of using a column for 'Area'
actual_generation_no1.columns = ['Area',
                                 'MTU (UTC)',
                                 'Biomass - BZN|NO1',
                                 'Fossil Brown coal/Lignite - BZN|NO1',
                                 'Fossil Coal-derived gas - BZN|NO1',
                                 'Fossil Gas - BZN|NO1',
                                 'Fossil Hard coal - BZN|NO1',
                                 'Fossil Oil - BZN|NO1',
                                 'Fossil Oil shale - BZN|NO1',
                                 'Fossil Peat - BZN|NO1',
                                 'Geothermal - BZN|NO1',
                                 'Hydro Pumped Storage Aggregated- BZN|NO1',
                                 'Hydro Pumped Storage Consumption - BZN|NO1',
                                 'Hydro Run-of-river and poundage - BZN|NO1',
                                 'Hydro Water Reservoir - BZN|NO1',
                                 'Marine - BZN|NO1',
                                 'Nuclear - BZN|NO1',
                                 'Other - BZN|NO1',
                                 'Other renewable - BZN|NO1',
                                 'Solar - BZN|NO1',
                                 'Waste - BZN|NO1',
                                 'Wind Offshore - BZN|NO1',
                                 'Wind Onshore - BZN|NO1'
                                 ]
actual_generation_no1_2020.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO1',
                              'Fossil Brown coal/Lignite - BZN|NO1',
                              'Fossil Coal-derived gas - BZN|NO1',
                              'Fossil Gas - BZN|NO1',
                              'Fossil Hard coal - BZN|NO1',
                              'Fossil Oil - BZN|NO1',
                              'Fossil Oil shale - BZN|NO1',
                              'Fossil Peat - BZN|NO1',
                              'Geothermal - BZN|NO1',
                              'Hydro Pumped Storage Aggregated- BZN|NO1',
                              'Hydro Pumped Storage Consumption - BZN|NO1',
                              'Hydro Run-of-river and poundage - BZN|NO1',
                              'Hydro Water Reservoir - BZN|NO1',
                              'Marine - BZN|NO1',
                              'Nuclear - BZN|NO1',
                              'Other - BZN|NO1',
                              'Other renewable - BZN|NO1',
                              'Solar - BZN|NO1',
                              'Waste - BZN|NO1',
                              'Wind Offshore - BZN|NO1',
                              'Wind Onshore - BZN|NO1'
                              ]
actual_generation_no1_2021.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO1',
                              'Fossil Brown coal/Lignite - BZN|NO1',
                              'Fossil Coal-derived gas - BZN|NO1',
                              'Fossil Gas - BZN|NO1',
                              'Fossil Hard coal - BZN|NO1',
                              'Fossil Oil - BZN|NO1',
                              'Fossil Oil shale - BZN|NO1',
                              'Fossil Peat - BZN|NO1',
                              'Geothermal - BZN|NO1',
                              'Hydro Pumped Storage Aggregated- BZN|NO1',
                              'Hydro Pumped Storage Consumption - BZN|NO1',
                              'Hydro Run-of-river and poundage - BZN|NO1',
                              'Hydro Water Reservoir - BZN|NO1',
                              'Marine - BZN|NO1',
                              'Nuclear - BZN|NO1',
                              'Other - BZN|NO1',
                              'Other renewable - BZN|NO1',
                              'Solar - BZN|NO1',
                              'Waste - BZN|NO1',
                              'Wind Offshore - BZN|NO1',
                              'Wind Onshore - BZN|NO1'
                              ]
actual_generation_no2.columns = ['Area',
                                 'MTU (UTC)',
                                 'Biomass - BZN|NO2',
                                 'Fossil Brown coal/Lignite - BZN|NO2',
                                 'Fossil Coal-derived gas - BZN|NO2',
                                 'Fossil Gas - BZN|NO2',
                                 'Fossil Hard coal - BZN|NO2',
                                 'Fossil Oil - BZN|NO2',
                                 'Fossil Oil shale - BZN|NO2',
                                 'Fossil Peat - BZN|NO2',
                                 'Geothermal - BZN|NO2',
                                 'Hydro Pumped Storage Aggregated- BZN|NO2',
                                 'Hydro Pumped Storage Consumption - BZN|NO2',
                                 'Hydro Run-of-river and poundage - BZN|NO2',
                                 'Hydro Water Reservoir - BZN|NO2',
                                 'Marine - BZN|NO2',
                                 'Nuclear - BZN|NO2',
                                 'Other - BZN|NO2',
                                 'Other renewable - BZN|NO2',
                                 'Solar - BZN|NO2',
                                 'Waste - BZN|NO2',
                                 'Wind Offshore - BZN|NO2',
                                 'Wind Onshore - BZN|NO2'
                                 ]
actual_generation_no2_2020.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO2',
                              'Fossil Brown coal/Lignite - BZN|NO2',
                              'Fossil Coal-derived gas - BZN|NO2',
                              'Fossil Gas - BZN|NO2',
                              'Fossil Hard coal - BZN|NO2',
                              'Fossil Oil - BZN|NO2',
                              'Fossil Oil shale - BZN|NO2',
                              'Fossil Peat - BZN|NO2',
                              'Geothermal - BZN|NO2',
                              'Hydro Pumped Storage Aggregated- BZN|NO2',
                              'Hydro Pumped Storage Consumption - BZN|NO2',
                              'Hydro Run-of-river and poundage - BZN|NO2',
                              'Hydro Water Reservoir - BZN|NO2',
                              'Marine - BZN|NO2',
                              'Nuclear - BZN|NO2',
                              'Other - BZN|NO2',
                              'Other renewable - BZN|NO2',
                              'Solar - BZN|NO2',
                              'Waste - BZN|NO2',
                              'Wind Offshore - BZN|NO2',
                              'Wind Onshore - BZN|NO2'
                              ]
actual_generation_no2_2021.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO2',
                              'Fossil Brown coal/Lignite - BZN|NO2',
                              'Fossil Coal-derived gas - BZN|NO2',
                              'Fossil Gas - BZN|NO2',
                              'Fossil Hard coal - BZN|NO2',
                              'Fossil Oil - BZN|NO2',
                              'Fossil Oil shale - BZN|NO2',
                              'Fossil Peat - BZN|NO2',
                              'Geothermal - BZN|NO2',
                              'Hydro Pumped Storage Aggregated- BZN|NO2',
                              'Hydro Pumped Storage Consumption - BZN|NO2',
                              'Hydro Run-of-river and poundage - BZN|NO2',
                              'Hydro Water Reservoir - BZN|NO2',
                              'Marine - BZN|NO2',
                              'Nuclear - BZN|NO2',
                              'Other - BZN|NO2',
                              'Other renewable - BZN|NO2',
                              'Solar - BZN|NO2',
                              'Waste - BZN|NO2',
                              'Wind Offshore - BZN|NO2',
                              'Wind Onshore - BZN|NO2'
                              ]
actual_generation_no3.columns = ['Area',
                                 'MTU (UTC)',
                                 'Biomass - BZN|NO3',
                                 'Fossil Brown coal/Lignite - BZN|NO3',
                                 'Fossil Coal-derived gas - BZN|NO3',
                                 'Fossil Gas - BZN|NO3',
                                 'Fossil Hard coal - BZN|NO3',
                                 'Fossil Oil - BZN|NO3',
                                 'Fossil Oil shale - BZN|NO3',
                                 'Fossil Peat - BZN|NO3',
                                 'Geothermal - BZN|NO3',
                                 'Hydro Pumped Storage Aggregated- BZN|NO3',
                                 'Hydro Pumped Storage Consumption - BZN|NO3',
                                 'Hydro Run-of-river and poundage - BZN|NO3',
                                 'Hydro Water Reservoir - BZN|NO3',
                                 'Marine - BZN|NO3',
                                 'Nuclear - BZN|NO3',
                                 'Other - BZN|NO3',
                                 'Other renewable - BZN|NO3',
                                 'Solar - BZN|NO3',
                                 'Waste - BZN|NO3',
                                 'Wind Offshore - BZN|NO3',
                                 'Wind Onshore - BZN|NO3'
                                 ]
actual_generation_no3_2020.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO3',
                              'Fossil Brown coal/Lignite - BZN|NO3',
                              'Fossil Coal-derived gas - BZN|NO3',
                              'Fossil Gas - BZN|NO3',
                              'Fossil Hard coal - BZN|NO3',
                              'Fossil Oil - BZN|NO3',
                              'Fossil Oil shale - BZN|NO3',
                              'Fossil Peat - BZN|NO3',
                              'Geothermal - BZN|NO3',
                              'Hydro Pumped Storage Aggregated- BZN|NO3',
                              'Hydro Pumped Storage Consumption - BZN|NO3',
                              'Hydro Run-of-river and poundage - BZN|NO3',
                              'Hydro Water Reservoir - BZN|NO3',
                              'Marine - BZN|NO3',
                              'Nuclear - BZN|NO3',
                              'Other - BZN|NO3',
                              'Other renewable - BZN|NO3',
                              'Solar - BZN|NO3',
                              'Waste - BZN|NO3',
                              'Wind Offshore - BZN|NO3',
                              'Wind Onshore - BZN|NO3'
                              ]
actual_generation_no3_2021.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO3',
                              'Fossil Brown coal/Lignite - BZN|NO3',
                              'Fossil Coal-derived gas - BZN|NO3',
                              'Fossil Gas - BZN|NO3',
                              'Fossil Hard coal - BZN|NO3',
                              'Fossil Oil - BZN|NO3',
                              'Fossil Oil shale - BZN|NO3',
                              'Fossil Peat - BZN|NO3',
                              'Geothermal - BZN|NO3',
                              'Hydro Pumped Storage Aggregated- BZN|NO3',
                              'Hydro Pumped Storage Consumption - BZN|NO3',
                              'Hydro Run-of-river and poundage - BZN|NO3',
                              'Hydro Water Reservoir - BZN|NO3',
                              'Marine - BZN|NO3',
                              'Nuclear - BZN|NO3',
                              'Other - BZN|NO3',
                              'Other renewable - BZN|NO3',
                              'Solar - BZN|NO3',
                              'Waste - BZN|NO3',
                              'Wind Offshore - BZN|NO3',
                              'Wind Onshore - BZN|NO3'
                              ]
actual_generation_no5.columns = ['Area',
                                 'MTU (UTC)',
                                 'Biomass - BZN|NO5',
                                 'Fossil Brown coal/Lignite - BZN|NO5',
                                 'Fossil Coal-derived gas - BZN|NO5',
                                 'Fossil Gas - BZN|NO5',
                                 'Fossil Hard coal - BZN|NO5',
                                 'Fossil Oil - BZN|NO5',
                                 'Fossil Oil shale - BZN|NO5',
                                 'Fossil Peat - BZN|NO5',
                                 'Geothermal - BZN|NO5',
                                 'Hydro Pumped Storage Aggregated- BZN|NO5',
                                 'Hydro Pumped Storage Consumption - BZN|NO5',
                                 'Hydro Run-of-river and poundage - BZN|NO5',
                                 'Hydro Water Reservoir - BZN|NO5',
                                 'Marine - BZN|NO5',
                                 'Nuclear - BZN|NO5',
                                 'Other - BZN|NO5',
                                 'Other renewable - BZN|NO5',
                                 'Solar - BZN|NO5',
                                 'Waste - BZN|NO5',
                                 'Wind Offshore - BZN|NO5',
                                 'Wind Onshore - BZN|NO5'
                                 ]
actual_generation_no5_2020.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO5',
                              'Fossil Brown coal/Lignite - BZN|NO5',
                              'Fossil Coal-derived gas - BZN|NO5',
                              'Fossil Gas - BZN|NO5',
                              'Fossil Hard coal - BZN|NO5',
                              'Fossil Oil - BZN|NO5',
                              'Fossil Oil shale - BZN|NO5',
                              'Fossil Peat - BZN|NO5',
                              'Geothermal - BZN|NO5',
                              'Hydro Pumped Storage Aggregated- BZN|NO5',
                              'Hydro Pumped Storage Consumption - BZN|NO5',
                              'Hydro Run-of-river and poundage - BZN|NO5',
                              'Hydro Water Reservoir - BZN|NO5',
                              'Marine - BZN|NO5',
                              'Nuclear - BZN|NO5',
                              'Other - BZN|NO5',
                              'Other renewable - BZN|NO5',
                              'Solar - BZN|NO5',
                              'Waste - BZN|NO5',
                              'Wind Offshore - BZN|NO5',
                              'Wind Onshore - BZN|NO5'
                              ]
actual_generation_no5_2021.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|NO5',
                              'Fossil Brown coal/Lignite - BZN|NO5',
                              'Fossil Coal-derived gas - BZN|NO5',
                              'Fossil Gas - BZN|NO5',
                              'Fossil Hard coal - BZN|NO5',
                              'Fossil Oil - BZN|NO5',
                              'Fossil Oil shale - BZN|NO5',
                              'Fossil Peat - BZN|NO5',
                              'Geothermal - BZN|NO5',
                              'Hydro Pumped Storage Aggregated- BZN|NO5',
                              'Hydro Pumped Storage Consumption - BZN|NO5',
                              'Hydro Run-of-river and poundage - BZN|NO5',
                              'Hydro Water Reservoir - BZN|NO5',
                              'Marine - BZN|NO5',
                              'Nuclear - BZN|NO5',
                              'Other - BZN|NO5',
                              'Other renewable - BZN|NO5',
                              'Solar - BZN|NO5',
                              'Waste - BZN|NO5',
                              'Wind Offshore - BZN|NO5',
                              'Wind Onshore - BZN|NO5'
                              ]
actual_generation_se3.columns = ['Area',
                                 'MTU (UTC)',
                                 'Biomass - BZN|SE3',
                                 'Fossil Brown coal/Lignite - BZN|SE3',
                                 'Fossil Coal-derived gas - BZN|SE3',
                                 'Fossil Gas - BZN|SE3',
                                 'Fossil Hard coal - BZN|SE3',
                                 'Fossil Oil - BZN|SE3',
                                 'Fossil Oil shale - BZN|SE3',
                                 'Fossil Peat - BZN|SE3',
                                 'Geothermal - BZN|SE3',
                                 'Hydro Pumped Storage Aggregated- BZN|SE3',
                                 'Hydro Pumped Storage Consumption - BZN|SE3',
                                 'Hydro Run-of-river and poundage - BZN|SE3',
                                 'Hydro Water Reservoir - BZN|SE3',
                                 'Marine - BZN|SE3',
                                 'Nuclear - BZN|SE3',
                                 'Other - BZN|SE3',
                                 'Other renewable - BZN|SE3',
                                 'Solar - BZN|SE3',
                                 'Waste - BZN|SE3',
                                 'Wind Offshore - BZN|SE3',
                                 'Wind Onshore - BZN|SE3'
                                 ]
actual_generation_se3_2020.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|SE3',
                              'Fossil Brown coal/Lignite - BZN|SE3',
                              'Fossil Coal-derived gas - BZN|SE3',
                              'Fossil Gas - BZN|SE3',
                              'Fossil Hard coal - BZN|SE3',
                              'Fossil Oil - BZN|SE3',
                              'Fossil Oil shale - BZN|SE3',
                              'Fossil Peat - BZN|SE3',
                              'Geothermal - BZN|SE3',
                              'Hydro Pumped Storage Aggregated- BZN|SE3',
                              'Hydro Pumped Storage Consumption - BZN|SE3',
                              'Hydro Run-of-river and poundage - BZN|SE3',
                              'Hydro Water Reservoir - BZN|SE3',
                              'Marine - BZN|SE3',
                              'Nuclear - BZN|SE3',
                              'Other - BZN|SE3',
                              'Other renewable - BZN|SE3',
                              'Solar - BZN|SE3',
                              'Waste - BZN|SE3',
                              'Wind Offshore - BZN|SE3',
                              'Wind Onshore - BZN|SE3'
                              ]
actual_generation_se3_2021.columns = ['Area',
                              'MTU (UTC)',
                              'Biomass - BZN|SE3',
                              'Fossil Brown coal/Lignite - BZN|SE3',
                              'Fossil Coal-derived gas - BZN|SE3',
                              'Fossil Gas - BZN|SE3',
                              'Fossil Hard coal - BZN|SE3',
                              'Fossil Oil - BZN|SE3',
                              'Fossil Oil shale - BZN|SE3',
                              'Fossil Peat - BZN|SE3',
                              'Geothermal - BZN|SE3',
                              'Hydro Pumped Storage Aggregated- BZN|SE3',
                              'Hydro Pumped Storage Consumption - BZN|SE3',
                              'Hydro Run-of-river and poundage - BZN|SE3',
                              'Hydro Water Reservoir - BZN|SE3',
                              'Marine - BZN|SE3',
                              'Nuclear - BZN|SE3',
                              'Other - BZN|SE3',
                              'Other renewable - BZN|SE3',
                              'Solar - BZN|SE3',
                              'Waste - BZN|SE3',
                              'Wind Offshore - BZN|SE3',
                              'Wind Onshore - BZN|SE3'
                              ]
actual_generation_no1.head()

Unnamed: 0,Area,MTU (UTC),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Marine - BZN|NO1,Nuclear - BZN|NO1,Other - BZN|NO1,Other renewable - BZN|NO1,Solar - BZN|NO1,Waste - BZN|NO1,Wind Offshore - BZN|NO1,Wind Onshore - BZN|NO1
0,BZN|NO1,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,878,321,n/e,n/e,n/e,n/e,n/e,8,n/e,172
1,BZN|NO1,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,897,330,n/e,n/e,n/e,n/e,n/e,8,n/e,151
2,BZN|NO1,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,885,275,n/e,n/e,n/e,n/e,n/e,8,n/e,160
3,BZN|NO1,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,890,269,n/e,n/e,n/e,n/e,n/e,8,n/e,148
4,BZN|NO1,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,...,892,300,n/e,n/e,n/e,n/e,n/e,8,n/e,147


In [41]:
# Dropping area because we keep the zone name in the column name
actual_generation_no1 = actual_generation_no1.drop(['Area'], axis=1)
actual_generation_no1_2020 = actual_generation_no1_2020.drop(['Area'], axis=1)
actual_generation_no1_2021 = actual_generation_no1_2021.drop(['Area'], axis=1)
actual_generation_no2 = actual_generation_no2.drop(['Area'], axis=1)
actual_generation_no2_2020 = actual_generation_no2_2020.drop(['Area'], axis=1)
actual_generation_no2_2021 = actual_generation_no2_2021.drop(['Area'], axis=1)
actual_generation_no3 = actual_generation_no3.drop(['Area'], axis=1)
actual_generation_no3_2020 = actual_generation_no3_2020.drop(['Area'], axis=1)
actual_generation_no3_2021 = actual_generation_no3_2021.drop(['Area'], axis=1)
actual_generation_no5 = actual_generation_no5.drop(['Area'], axis=1)
actual_generation_no5_2020 = actual_generation_no5_2020.drop(['Area'], axis=1)
actual_generation_no5_2021 = actual_generation_no5_2021.drop(['Area'], axis=1)
actual_generation_se3 = actual_generation_se3.drop(['Area'], axis=1)
actual_generation_se3_2020 = actual_generation_se3_2020.drop(['Area'], axis=1)
actual_generation_se3_2021 = actual_generation_se3_2021.drop(['Area'], axis=1)
actual_generation_no1.head()

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,Geothermal - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Marine - BZN|NO1,Nuclear - BZN|NO1,Other - BZN|NO1,Other renewable - BZN|NO1,Solar - BZN|NO1,Waste - BZN|NO1,Wind Offshore - BZN|NO1,Wind Onshore - BZN|NO1
0,01.01.2022 00:00 - 01.01.2022 01:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,878,321,n/e,n/e,n/e,n/e,n/e,8,n/e,172
1,01.01.2022 01:00 - 01.01.2022 02:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,897,330,n/e,n/e,n/e,n/e,n/e,8,n/e,151
2,01.01.2022 02:00 - 01.01.2022 03:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,885,275,n/e,n/e,n/e,n/e,n/e,8,n/e,160
3,01.01.2022 03:00 - 01.01.2022 04:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,890,269,n/e,n/e,n/e,n/e,n/e,8,n/e,148
4,01.01.2022 04:00 - 01.01.2022 05:00 (UTC),0,n/e,n/e,0,n/e,n/e,n/e,n/e,n/e,...,892,300,n/e,n/e,n/e,n/e,n/e,8,n/e,147


In [42]:
generation_no1 = pd.concat([actual_generation_no1, actual_generation_no1_2020, actual_generation_no1_2021]).sort_values('MTU (UTC)').reset_index(drop=True)
generation_no2 = pd.concat([actual_generation_no2, actual_generation_no2_2020, actual_generation_no2_2021]).sort_values('MTU (UTC)').reset_index(drop=True)
generation_no3 = pd.concat([actual_generation_no3, actual_generation_no3_2020, actual_generation_no3_2021]).sort_values('MTU (UTC)').reset_index(drop=True)
generation_no5 = pd.concat([actual_generation_no5, actual_generation_no5_2020, actual_generation_no5_2021]).sort_values('MTU (UTC)').reset_index(drop=True)
generation_se3 = pd.concat([actual_generation_se3, actual_generation_se3_2020, actual_generation_se3_2021]).sort_values('MTU (UTC)').reset_index(drop=True)
generation_no1.shape

(26304, 22)

In [43]:
# Creating one dataframe from the 5 dataframes for each zone
actual_generation_dataframes = [generation_no1, generation_no2, generation_no3, generation_no5, generation_se3]
actual_generation = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), actual_generation_dataframes)

# printing out the resulting dataframe
actual_generation.head()

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,Geothermal - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|SE3,Hydro Water Reservoir - BZN|SE3,Marine - BZN|SE3,Nuclear - BZN|SE3,Other - BZN|SE3,Other renewable - BZN|SE3,Solar - BZN|SE3,Waste - BZN|SE3,Wind Offshore - BZN|SE3,Wind Onshore - BZN|SE3
0,01.01.2020 00:00 - 01.01.2020 01:00 (UTC),n/e,n/e,n/e,25.0,n/e,n/e,n/e,n/e,n/e,...,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,1594.0
1,01.01.2020 01:00 - 01.01.2020 02:00 (UTC),n/e,n/e,n/e,25.0,n/e,n/e,n/e,n/e,n/e,...,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,1690.0
2,01.01.2020 02:00 - 01.01.2020 03:00 (UTC),n/e,n/e,n/e,25.0,n/e,n/e,n/e,n/e,n/e,...,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,1783.0
3,01.01.2020 03:00 - 01.01.2020 04:00 (UTC),n/e,n/e,n/e,25.0,n/e,n/e,n/e,n/e,n/e,...,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,1900.0
4,01.01.2020 04:00 - 01.01.2020 05:00 (UTC),n/e,n/e,n/e,25.0,n/e,n/e,n/e,n/e,n/e,...,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,n/e,1995.0


We have alot of columns which are used in other nations and zones that are redundant to us because there is no power generation from these sources in the zones we are looking at. Therefore, we select only the columns which keep information about power generation in each zone.

In [44]:
# Selecting only relevant columns
actual_generation_selected = actual_generation[['MTU (UTC)',
                                                'Biomass - BZN|NO1',
                                                'Fossil Gas - BZN|NO1',
                                                'Hydro Run-of-river and poundage - BZN|NO1',
                                                'Hydro Water Reservoir - BZN|NO1',
                                                'Waste - BZN|NO1',
                                                'Wind Onshore - BZN|NO1',
                                                'Fossil Gas - BZN|NO2',
                                                'Hydro Pumped Storage Aggregated- BZN|NO2',
                                                'Hydro Run-of-river and poundage - BZN|NO2',
                                                'Hydro Water Reservoir - BZN|NO2',
                                                'Waste - BZN|NO2',
                                                'Wind Onshore - BZN|NO2',
                                                'Fossil Gas - BZN|NO3',
                                                'Hydro Pumped Storage Aggregated- BZN|NO3',
                                                'Hydro Run-of-river and poundage - BZN|NO3',
                                                'Hydro Water Reservoir - BZN|NO3',
                                                'Other - BZN|NO3',
                                                'Other renewable - BZN|NO3',
                                                'Waste - BZN|NO3',
                                                'Wind Onshore - BZN|NO3',
                                                'Fossil Gas - BZN|NO5',
                                                'Hydro Pumped Storage Aggregated- BZN|NO5',
                                                'Hydro Run-of-river and poundage - BZN|NO5',
                                                'Hydro Water Reservoir - BZN|NO5',
                                                'Waste - BZN|NO5',
                                                'Fossil Gas - BZN|SE3',
                                                'Hydro Water Reservoir - BZN|SE3',
                                                'Marine - BZN|SE3',
                                                'Nuclear - BZN|SE3',
                                                'Other - BZN|SE3',
                                                'Solar - BZN|SE3',
                                                'Wind Onshore - BZN|SE3']].copy()
# printing the head of the resulting dataframe
actual_generation_selected.dtypes

MTU (UTC)                                     object
Biomass - BZN|NO1                             object
Fossil Gas - BZN|NO1                         float64
Hydro Run-of-river and poundage - BZN|NO1    float64
Hydro Water Reservoir - BZN|NO1              float64
Waste - BZN|NO1                               object
Wind Onshore - BZN|NO1                       float64
Fossil Gas - BZN|NO2                         float64
Hydro Pumped Storage Aggregated- BZN|NO2      object
Hydro Run-of-river and poundage - BZN|NO2    float64
Hydro Water Reservoir - BZN|NO2              float64
Waste - BZN|NO2                               object
Wind Onshore - BZN|NO2                       float64
Fossil Gas - BZN|NO3                          object
Hydro Pumped Storage Aggregated- BZN|NO3      object
Hydro Run-of-river and poundage - BZN|NO3    float64
Hydro Water Reservoir - BZN|NO3              float64
Other - BZN|NO3                              float64
Other renewable - BZN|NO3                     

We can see that the formatting of the datetime interval includes (CET/CEST) in the actual values. This extra information will need to be removed so they mach the other datasets, and can be merged on the time column

In [45]:
# n/e is not expected data meaning there is no generation from that production type at that time, so we replace with 0
actual_generation_selected = actual_generation_selected.replace(['n/e'], 0)

In [46]:
actual_generation_selected['Biomass - BZN|NO1'] = actual_generation_selected['Biomass - BZN|NO1'].astype(float)
actual_generation_selected['Waste - BZN|NO1'] = actual_generation_selected['Waste - BZN|NO1'].astype('float')
actual_generation_selected['Hydro Pumped Storage Aggregated- BZN|NO2'] = actual_generation_selected['Hydro Pumped Storage Aggregated- BZN|NO2'].astype('float')
actual_generation_selected['Waste - BZN|NO2'] = actual_generation_selected['Waste - BZN|NO2'].astype('float')
actual_generation_selected['Fossil Gas - BZN|NO3'] = actual_generation_selected['Fossil Gas - BZN|NO3'].astype('float')
actual_generation_selected['Hydro Pumped Storage Aggregated- BZN|NO3'] = actual_generation_selected['Hydro Pumped Storage Aggregated- BZN|NO3'].astype('float')
actual_generation_selected['Other renewable - BZN|NO3'] = actual_generation_selected['Other renewable - BZN|NO3'].astype('float')
actual_generation_selected['Waste - BZN|NO3'] = actual_generation_selected['Waste - BZN|NO3'].astype('float')
actual_generation_selected['Hydro Pumped Storage Aggregated- BZN|NO5'] = actual_generation_selected['Hydro Pumped Storage Aggregated- BZN|NO5'].astype('float')
actual_generation_selected['Waste - BZN|NO5'] = actual_generation_selected['Waste - BZN|NO5'].astype('float')
actual_generation_selected['Fossil Gas - BZN|SE3'] = actual_generation_selected['Fossil Gas - BZN|SE3'].astype('float')
actual_generation_selected['Hydro Water Reservoir - BZN|SE3'] = actual_generation_selected['Hydro Water Reservoir - BZN|SE3'].astype('float')
actual_generation_selected['Marine - BZN|SE3'] = actual_generation_selected['Marine - BZN|SE3'].astype('float')
actual_generation_selected['Nuclear - BZN|SE3'] = actual_generation_selected['Nuclear - BZN|SE3'].astype('float')
actual_generation_selected['Other - BZN|SE3'] = actual_generation_selected['Other - BZN|SE3'].astype('float')
actual_generation_selected['Solar - BZN|SE3'] = actual_generation_selected['Solar - BZN|SE3'].astype('float')
actual_generation_selected.dtypes

MTU (UTC)                                     object
Biomass - BZN|NO1                            float64
Fossil Gas - BZN|NO1                         float64
Hydro Run-of-river and poundage - BZN|NO1    float64
Hydro Water Reservoir - BZN|NO1              float64
Waste - BZN|NO1                              float64
Wind Onshore - BZN|NO1                       float64
Fossil Gas - BZN|NO2                         float64
Hydro Pumped Storage Aggregated- BZN|NO2     float64
Hydro Run-of-river and poundage - BZN|NO2    float64
Hydro Water Reservoir - BZN|NO2              float64
Waste - BZN|NO2                              float64
Wind Onshore - BZN|NO2                       float64
Fossil Gas - BZN|NO3                         float64
Hydro Pumped Storage Aggregated- BZN|NO3     float64
Hydro Run-of-river and poundage - BZN|NO3    float64
Hydro Water Reservoir - BZN|NO3              float64
Other - BZN|NO3                              float64
Other renewable - BZN|NO3                    f

In [47]:
# removing (CET/CEST) and trailing whitespace from datetime values in column 'MTU (CET/CEST)'
actual_generation_selected['MTU (UTC)'] = actual_generation_selected['MTU (UTC)'].map(lambda x: x.rstrip(')(UTC').strip())

# printing out resulting dataset
actual_generation_selected.head()

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Gas - BZN|NO1,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Waste - BZN|NO1,Wind Onshore - BZN|NO1,Fossil Gas - BZN|NO2,Hydro Pumped Storage Aggregated- BZN|NO2,Hydro Run-of-river and poundage - BZN|NO2,...,Hydro Run-of-river and poundage - BZN|NO5,Hydro Water Reservoir - BZN|NO5,Waste - BZN|NO5,Fossil Gas - BZN|SE3,Hydro Water Reservoir - BZN|SE3,Marine - BZN|SE3,Nuclear - BZN|SE3,Other - BZN|SE3,Solar - BZN|SE3,Wind Onshore - BZN|SE3
0,01.01.2020 00:00 - 01.01.2020 01:00,0.0,25.0,727.0,1130.0,0.0,149.0,4.0,0.0,317.0,...,53.0,3553.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1594.0
1,01.01.2020 01:00 - 01.01.2020 02:00,0.0,25.0,729.0,1115.0,0.0,153.0,4.0,0.0,316.0,...,52.0,3326.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1690.0
2,01.01.2020 02:00 - 01.01.2020 03:00,0.0,25.0,731.0,1087.0,0.0,153.0,4.0,0.0,316.0,...,52.0,2915.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1783.0
3,01.01.2020 03:00 - 01.01.2020 04:00,0.0,25.0,731.0,1058.0,0.0,153.0,4.0,0.0,314.0,...,53.0,2594.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1900.0
4,01.01.2020 04:00 - 01.01.2020 05:00,0.0,25.0,731.0,1030.0,0.0,155.0,4.0,0.0,313.0,...,52.0,2392.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1995.0


In [48]:
actual_generation_selected.isna().sum()

MTU (UTC)                                     0
Biomass - BZN|NO1                            13
Fossil Gas - BZN|NO1                          5
Hydro Run-of-river and poundage - BZN|NO1     5
Hydro Water Reservoir - BZN|NO1               5
Waste - BZN|NO1                               5
Wind Onshore - BZN|NO1                        5
Fossil Gas - BZN|NO2                          6
Hydro Pumped Storage Aggregated- BZN|NO2      5
Hydro Run-of-river and poundage - BZN|NO2     5
Hydro Water Reservoir - BZN|NO2               5
Waste - BZN|NO2                               5
Wind Onshore - BZN|NO2                        5
Fossil Gas - BZN|NO3                          5
Hydro Pumped Storage Aggregated- BZN|NO3      5
Hydro Run-of-river and poundage - BZN|NO3     5
Hydro Water Reservoir - BZN|NO3               5
Other - BZN|NO3                               5
Other renewable - BZN|NO3                    73
Waste - BZN|NO3                               5
Wind Onshore - BZN|NO3                  

In [49]:
actual_generation_selected[actual_generation_selected.isna().any(axis=1)]

Unnamed: 0,MTU (UTC),Biomass - BZN|NO1,Fossil Gas - BZN|NO1,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Waste - BZN|NO1,Wind Onshore - BZN|NO1,Fossil Gas - BZN|NO2,Hydro Pumped Storage Aggregated- BZN|NO2,Hydro Run-of-river and poundage - BZN|NO2,...,Hydro Run-of-river and poundage - BZN|NO5,Hydro Water Reservoir - BZN|NO5,Waste - BZN|NO5,Fossil Gas - BZN|SE3,Hydro Water Reservoir - BZN|SE3,Marine - BZN|SE3,Nuclear - BZN|SE3,Other - BZN|SE3,Solar - BZN|SE3,Wind Onshore - BZN|SE3
792,01.12.2020 00:00 - 01.12.2020 01:00,0.0,0.0,669.0,1437.0,3.0,65.0,0.0,502.0,442.0,...,60.0,3601.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,693.0
793,01.12.2020 01:00 - 01.12.2020 02:00,0.0,0.0,646.0,1426.0,3.0,59.0,0.0,423.0,439.0,...,59.0,3524.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,586.0
795,01.12.2020 03:00 - 01.12.2020 04:00,0.0,0.0,608.0,1461.0,3.0,44.0,0.0,423.0,436.0,...,58.0,3591.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,398.0
796,01.12.2020 04:00 - 01.12.2020 05:00,0.0,0.0,589.0,1623.0,3.0,69.0,0.0,553.0,433.0,...,58.0,3787.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,321.0
797,01.12.2020 05:00 - 01.12.2020 06:00,0.0,0.0,567.0,1681.0,3.0,89.0,0.0,564.0,425.0,...,57.0,4071.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,274.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25668,30.11.2020 12:00 - 30.11.2020 13:00,0.0,0.0,664.0,1895.0,6.0,94.0,0.0,590.0,424.0,...,63.0,4569.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,1047.0
25670,30.11.2020 14:00 - 30.11.2020 15:00,0.0,0.0,669.0,2035.0,4.0,99.0,0.0,593.0,430.0,...,62.0,4729.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,1106.0
25671,30.11.2020 15:00 - 30.11.2020 16:00,0.0,0.0,678.0,2063.0,4.0,105.0,0.0,592.0,440.0,...,62.0,4822.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,1133.0
25675,30.11.2020 19:00 - 30.11.2020 20:00,0.0,0.0,695.0,1803.0,4.0,49.0,0.0,597.0,457.0,...,66.0,4148.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,1128.0


In [50]:
actual_generation_selected.shape

(26304, 33)

### Data Aggregation for import and export NO1 and neighbouring zones

We want to concatenate data for cross border physical flow between NO1 and neighbouring zones(NO2,NO3,NO5,SE3).

In [51]:
# reading import export csv to dataframes

# 2022 import export for NO1, NO2, NO3, NO5, SE3

import_export_no1_no2_2022 = pd.read_csv("../datasets/import_and_export/NO1-NO2 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_no3_2022 = pd.read_csv("../datasets/import_and_export/NO1-NO3 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_no5_2022 = pd.read_csv("../datasets/import_and_export/NO1-NO5 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_se3_2022 = pd.read_csv("../datasets/import_and_export/NO1-SE3 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")

# 2021 import export for NO1, NO2, NO3, NO5, SE3

import_export_no1_no2_2021 = pd.read_csv("../datasets/import_and_export/NO1-NO2 Import export Cross-Border Physical Flow_202101010000-202201010000.csv")
import_export_no1_no3_2021 = pd.read_csv("../datasets/import_and_export/NO1-NO3 Import export Cross-Border Physical Flow_202101010000-202201010000.csv")
import_export_no1_no5_2021 = pd.read_csv("../datasets/import_and_export/NO1-NO5 Import export Cross-Border Physical Flow_202101010000-202201010000.csv")
import_export_no1_se3_2021 = pd.read_csv("../datasets/import_and_export/NO1-SE3 Import export Cross-Border Physical Flow_202101010000-202201010000.csv")

# 2020 import export for NO1, NO2, NO3, NO5, SE3
import_export_no1_no2_2020 = pd.read_csv("../datasets/import_and_export/NO1-NO2 Import export Cross-Border Physical Flow_202001010000-202101010000.csv")
import_export_no1_no3_2020 = pd.read_csv("../datasets/import_and_export/NO1-NO3 Import export Cross-Border Physical Flow_202001010000-202101010000.csv")
import_export_no1_no5_2020 = pd.read_csv("../datasets/import_and_export/NO1-NO5 Import export Cross-Border Physical Flow_202001010000-202101010000.csv")
import_export_no1_se3_2020 = pd.read_csv("../datasets/import_and_export/NO1-SE3 Import export Cross-Border Physical Flow_202001010000-202101010000.csv")

# creating list of price dataframes

prices_dataframes = [
    import_export_no1_no2_2022,
    import_export_no1_no3_2022,
    import_export_no1_no5_2022,
    import_export_no1_se3_2022,
    import_export_no1_no2_2021,
    import_export_no1_no3_2021,
    import_export_no1_no5_2021,
    import_export_no1_se3_2021,
    import_export_no1_no2_2020,
    import_export_no1_no3_2020,
    import_export_no1_no5_2020,
    import_export_no1_se3_2020]

# checking shape of each dataframe to see if all dataframes have the same number of rows.
for i in prices_dataframes:
    print(i.shape)

(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8760, 3)
(8784, 3)
(8784, 3)
(8784, 3)
(8784, 3)


In [52]:
# Concatenating the datasets
import_export_no1_no2 = pd.concat([import_export_no1_no2_2022, import_export_no1_no2_2021, import_export_no1_no2_2020])
import_export_no1_no3 = pd.concat([import_export_no1_no3_2022, import_export_no1_no3_2021, import_export_no1_no3_2020])
import_export_no1_no5 = pd.concat([import_export_no1_no5_2022, import_export_no1_no5_2021, import_export_no1_no5_2020])
import_export_no1_se3 = pd.concat([import_export_no1_se3_2022, import_export_no1_se3_2021, import_export_no1_se3_2020])


# concatenated dataframes as a list
concat_prices_dataframes = [import_export_no1_no2, import_export_no1_no3, import_export_no1_no5, import_export_no1_se3]

# checking the results
for i in concat_prices_dataframes:
    print(i.shape)

(26304, 3)
(26304, 3)
(26304, 3)
(26304, 3)


In [53]:
import_export_no1_no2.head()

Unnamed: 0,Time (UTC),BZN|NO2 > BZN|NO1 [MW],BZN|NO1 > BZN|NO2 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1242.0,0.0
1,01.01.2022 01:00 - 01.01.2022 02:00,1222.0,0.0
2,01.01.2022 02:00 - 01.01.2022 03:00,1679.0,0.0
3,01.01.2022 03:00 - 01.01.2022 04:00,1668.0,0.0
4,01.01.2022 04:00 - 01.01.2022 05:00,1697.0,0.0


In [54]:
import_export_no1_se3.head()

Unnamed: 0,Time (UTC),BZN|SE3 > BZN|NO1 [MW],BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1065,0
1,01.01.2022 01:00 - 01.01.2022 02:00,927,0
2,01.01.2022 02:00 - 01.01.2022 03:00,748,0
3,01.01.2022 03:00 - 01.01.2022 04:00,918,0
4,01.01.2022 04:00 - 01.01.2022 05:00,826,0


We see that the time column in the import export datasets is named "Time (UTC)" while in the other datasets, the time column is named "MTU (UTC)". The time column will be renamed to "MTU (UTC)" so that the datasets can be merged. To be consistent Time (UTC) is changed to MTU (UTC). To make it clear that 'BZN|NO2 > BZN|NO1 [MW]' shows the cross border physical flow , CBF is used as an abbreviation.

In [55]:
import_export_no1_no2.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|NO2 > BZN|NO1 [MW]' : 'CBF BZN|NO2 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO2 [MW]' : 'CBF BZN|NO1 > BZN|NO2 [MW]'}, inplace = True)
import_export_no1_no3.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|NO3 > BZN|NO1 [MW]' : 'CBF BZN|NO3 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO3 [MW]' : 'CBF BZN|NO1 > BZN|NO3 [MW]'}, inplace = True)
import_export_no1_no5.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|NO5 > BZN|NO1 [MW]' : 'CBF BZN|NO5 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO5 [MW]' : 'CBF BZN|NO1 > BZN|NO5 [MW]'}, inplace = True)
import_export_no1_se3.rename(columns = {'Time (UTC)':'MTU (UTC)', 'BZN|SE3 > BZN|NO1 [MW]' : 'CBF BZN|SE3 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|SE3 [MW]' : 'CBF BZN|NO1 > BZN|SE3 [MW]'}, inplace = True)

Merging all dataframes for import and export


In [56]:
# Merging the import export datasets
import_export_no1_neighbours_dataframes = [import_export_no1_no2, import_export_no1_no3, import_export_no1_no5, import_export_no1_se3]
import_export_no1_neighbours_2020_2022 = reduce(lambda left, right: pd.merge(left,right, on=['MTU (UTC)']), import_export_no1_neighbours_dataframes)

# printing out the 5 first rows of the resulting dataframe
import_export_no1_neighbours_2020_2022.shape

(26304, 9)

In [57]:
import_export_no1_neighbours_2020_2022.head()

Unnamed: 0,MTU (UTC),CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1242.0,0.0,371.0,0.0,594.0,0.0,1065,0
1,01.01.2022 01:00 - 01.01.2022 02:00,1222.0,0.0,383.0,0.0,676.0,0.0,927,0
2,01.01.2022 02:00 - 01.01.2022 03:00,1679.0,0.0,362.0,0.0,436.0,0.0,748,0
3,01.01.2022 03:00 - 01.01.2022 04:00,1668.0,0.0,324.0,0.0,345.0,0.0,918,0
4,01.01.2022 04:00 - 01.01.2022 05:00,1697.0,0.0,339.0,0.0,412.0,0.0,826,0


In [58]:
import_export_no1_neighbours_2020_2022.tail()

Unnamed: 0,MTU (UTC),CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
26299,31.12.2020 19:00 - 31.12.2020 20:00,81.0,0.0,328.0,0.0,2061.0,0.0,126,0
26300,31.12.2020 20:00 - 31.12.2020 21:00,64.0,0.0,324.0,0.0,2046.0,0.0,12,0
26301,31.12.2020 21:00 - 31.12.2020 22:00,0.0,196.0,318.0,0.0,1901.0,0.0,237,0
26302,31.12.2020 22:00 - 31.12.2020 23:00,0.0,202.0,307.0,0.0,1821.0,0.0,192,0
26303,31.12.2020 23:00 - 01.01.2021 00:00,0.0,112.0,318.0,0.0,1864.0,0.0,135,0


In [59]:
# Checking for missing values
import_export_no1_neighbours_2020_2022.isna().sum()

MTU (UTC)                      0
CBF BZN|NO2 > BZN|NO1 [MW]    31
CBF BZN|NO1 > BZN|NO2 [MW]    31
CBF BZN|NO3 > BZN|NO1 [MW]    31
CBF BZN|NO1 > BZN|NO3 [MW]    31
CBF BZN|NO5 > BZN|NO1 [MW]    31
CBF BZN|NO1 > BZN|NO5 [MW]    31
CBF BZN|SE3 > BZN|NO1 [MW]     0
CBF BZN|NO1 > BZN|SE3 [MW]     0
dtype: int64

In [60]:
import_export_no1_neighbours_2020_2022[import_export_no1_neighbours_2020_2022['CBF BZN|NO3 > BZN|NO1 [MW]'].isnull()]

Unnamed: 0,MTU (UTC),CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
16033,31.10.2021 01:00 - 31.10.2021 02:00,,,,,,,317,0
17807,12.01.2020 23:00 - 13.01.2020 00:00,,,,,,,1750,0
17808,13.01.2020 00:00 - 13.01.2020 01:00,,,,,,,1442,0
17809,13.01.2020 01:00 - 13.01.2020 02:00,,,,,,,1449,0
17810,13.01.2020 02:00 - 13.01.2020 03:00,,,,,,,1351,0
17811,13.01.2020 03:00 - 13.01.2020 04:00,,,,,,,1372,0
17812,13.01.2020 04:00 - 13.01.2020 05:00,,,,,,,1089,0
17813,13.01.2020 05:00 - 13.01.2020 06:00,,,,,,,14,0
17814,13.01.2020 06:00 - 13.01.2020 07:00,,,,,,,0,921
17815,13.01.2020 07:00 - 13.01.2020 08:00,,,,,,,0,1039


It seems like import and export data between norwegian bidding zones are all missing for varous dates mostly in the year 2020, and one in the year 2021. Imputation of these missing values will be performed during Exploratory Data Analysis in order to choose the best method for the dataset.

### Aggregation of Water Reservoirs and Hydro Storage Plants

The water reservoir dataset is measured with a weekly frequency while the other datasets are measured hourly. In order to make the data match frequency of measurements of the other datasets, we will need to up sample the data and interpolate the missing values. We will first map the weekly measurement to the first hour of each week, and then interpolate the intermediate measurements based on the weekly measurements. In order to interpolate the measurements of the last week, we will add the measurement for the first week of the succeeding year (2023) to the dataset. this measurement  will be dropped from the final dataset. Additionally, the first week of 2022 starts on january 3. 2022, so in order to get the data for the first two days we add the dataset for the last week of 2021. We will use spline interpolation to add smoothness to the interpolated curve, which will be more representative of water level fluctuations than a linear interpolation.

Reading in water levels as CSV with pandas library

In [61]:
# Loading in last week from datasets (2020)
water_level_NO1_2019 = pd.read_csv("../datasets/water_level/NO1_2019_Water Reservoirs and Hydro Storage Plants_201812310000-201912300000.csv", skiprows=range(1, 52))
water_level_NO2_2019 = pd.read_csv("../datasets/water_level/NO2_2019_Water Reservoirs and Hydro Storage Plants_201812310000-201912300000.csv", skiprows=range(1, 52))
water_level_NO3_2019 = pd.read_csv("../datasets/water_level/NO3_2019_Water Reservoirs and Hydro Storage Plants_201812310000-201912300000.csv", skiprows=range(1, 52))
water_level_NO5_2019 = pd.read_csv("../datasets/water_level/NO5_2019_Water Reservoirs and Hydro Storage Plants_201812310000-201912300000.csv", skiprows=range(1, 52))
water_level_SE3_2019 = pd.read_csv("../datasets/water_level/SE3_2019_Water Reservoirs and Hydro Storage Plants_201812310000-201912300000.csv", skiprows=range(1, 52))

# Loading in datasets (2020)
water_level_NO1_2020 = pd.read_csv("../datasets/water_level/NO1_2020_Water Reservoirs and Hydro Storage Plants_201912300000-202101040000.csv")
water_level_NO2_2020 = pd.read_csv("../datasets/water_level/NO2_2020_Water Reservoirs and Hydro Storage Plants_201912300000-202101040000.csv")
water_level_NO3_2020 = pd.read_csv("../datasets/water_level/NO3_2020_Water Reservoirs and Hydro Storage Plants_201912300000-202101040000.csv")
water_level_NO5_2020 = pd.read_csv("../datasets/water_level/NO5_2020_Water Reservoirs and Hydro Storage Plants_201912300000-202101040000.csv")
water_level_SE3_2020 = pd.read_csv("../datasets/water_level/SE3_2020_Water Reservoirs and Hydro Storage Plants_201912300000-202101040000.csv")

# Loading in datasets (2021)
water_level_NO1_2021 = pd.read_csv("../datasets/water_level/NO1_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv")
water_level_NO2_2021 = pd.read_csv("../datasets/water_level/NO2_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv")
water_level_NO3_2021 = pd.read_csv("../datasets/water_level/NO3_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv")
water_level_NO5_2021 = pd.read_csv("../datasets/water_level/NO5_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv")
water_level_SE3_2021 = pd.read_csv("../datasets/water_level/SE3_2021_Water Reservoirs and Hydro Storage Plants_202101040000-202201030000.csv")

# Loading in datasets (2022)
water_level_NO1_2022 = pd.read_csv("../datasets/water_level/NO1_2022_Water_Reservoirs_and_Hydro_Storage_Plants_202201030000-202301020000.csv")
water_level_NO2_2022 = pd.read_csv("../datasets/water_level/NO2_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_NO3_2022 = pd.read_csv("../datasets/water_level/NO3_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_NO5_2022 = pd.read_csv("../datasets/water_level/NO5_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_SE3_2022 = pd.read_csv("../datasets/water_level/SE3_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")

# Loading in only the first week of 2023
water_level_NO1_2023 = pd.read_csv("../datasets/water_level/NO1_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO2_2023 = pd.read_csv("../datasets/water_level/NO2_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO3_2023 = pd.read_csv("../datasets/water_level/NO3_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO5_2023 = pd.read_csv("../datasets/water_level/NO5_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_SE3_2023 = pd.read_csv("../datasets/water_level/SE3_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)

In [62]:
water_level_NO1 = pd.concat([water_level_NO1_2019, water_level_NO1_2020, water_level_NO1_2021, water_level_NO1_2022, water_level_NO1_2023], ignore_index= True, axis= 0)
water_level_NO2 = pd.concat([water_level_NO2_2019, water_level_NO2_2020, water_level_NO2_2021, water_level_NO2_2022, water_level_NO2_2023], ignore_index= True, axis= 0)
water_level_NO3 = pd.concat([water_level_NO3_2019, water_level_NO3_2020, water_level_NO3_2021, water_level_NO3_2022, water_level_NO3_2023], ignore_index= True, axis= 0)
water_level_NO5 = pd.concat([water_level_NO5_2019, water_level_NO5_2020, water_level_NO5_2021, water_level_NO5_2022, water_level_NO5_2023], ignore_index= True, axis= 0)
water_level_SE3 = pd.concat([water_level_SE3_2019, water_level_SE3_2020, water_level_SE3_2021, water_level_SE3_2022, water_level_SE3_2023], ignore_index= True, axis= 0)

In [63]:
print(water_level_NO1.shape)
print(water_level_NO2.shape)
print(water_level_NO3.shape)
print(water_level_NO5.shape)
print(water_level_SE3.shape)

(159, 2)
(159, 2)
(159, 2)
(159, 2)
(159, 2)


In [64]:
water_level_SE3.info

<bound method DataFrame.info of         Week  Stored Energy Value [MWh] - BZN|SE3
0    Week 52                            2248000.0
1     Week 1                            2220000.0
2     Week 2                            2189000.0
3     Week 3                            2168000.0
4     Week 4                            2096000.0
..       ...                                  ...
154  Week 49                            1636000.0
155  Week 50                            1519000.0
156  Week 51                            1525000.0
157  Week 52                            1579000.0
158   Week 1                            1584000.0

[159 rows x 2 columns]>

In [65]:
range_numbers = pd.Series(range(1,160), dtype=np.int64, name='Week_Number')
weeknumber = range_numbers.to_frame()
weeknumber = pd.DataFrame(weeknumber)

In [66]:
weeknumber.info

<bound method DataFrame.info of      Week_Number
0              1
1              2
2              3
3              4
4              5
..           ...
154          155
155          156
156          157
157          158
158          159

[159 rows x 1 columns]>

In [67]:
joined_water_week_NO1 = pd.concat([weeknumber, water_level_NO1], axis=1)
joined_water_week_NO2 = pd.concat([weeknumber, water_level_NO2], axis=1)
joined_water_week_NO3 = pd.concat([weeknumber, water_level_NO3], axis=1)
joined_water_week_NO5 = pd.concat([weeknumber, water_level_NO5], axis=1)
joined_water_week_SE3 = pd.concat([weeknumber, water_level_SE3], axis=1)

joined_water_week_NO1.drop('Week', axis=1, inplace=True)
joined_water_week_NO2.drop('Week', axis=1, inplace=True)
joined_water_week_NO3.drop('Week', axis=1, inplace=True)
joined_water_week_NO5.drop('Week', axis=1, inplace=True)
joined_water_week_SE3.drop('Week', axis=1, inplace=True)

In [68]:
joined_water_week_NO1.info

<bound method DataFrame.info of      Week_Number  Stored Energy Value [MWh] - BZN|NO1
0              1                              4140000
1              2                              4012000
2              3                              3844000
3              4                              3718000
4              5                              3568000
..           ...                                  ...
154          155                              4547299
155          156                              4240847
156          157                              4054917
157          158                              3915448
158          159                              3741135

[159 rows x 2 columns]>

In [69]:
# joined_water_week_NO1[joined_water_week_NO1['Stored Energy Value [MWh] - BZN|NO1'].isnull()]
print(joined_water_week_NO1.isna().sum())
print(joined_water_week_NO2.isna().sum())
print(joined_water_week_NO3.isna().sum())
print(joined_water_week_NO5.isna().sum())
print(joined_water_week_SE3.isna().sum())

Week_Number                            0
Stored Energy Value [MWh] - BZN|NO1    0
dtype: int64
Week_Number                            0
Stored Energy Value [MWh] - BZN|NO2    0
dtype: int64
Week_Number                            0
Stored Energy Value [MWh] - BZN|NO3    0
dtype: int64
Week_Number                            0
Stored Energy Value [MWh] - BZN|NO5    0
dtype: int64
Week_Number                            0
Stored Energy Value [MWh] - BZN|SE3    1
dtype: int64


In [70]:
joined_water_week_SE3[joined_water_week_SE3['Stored Energy Value [MWh] - BZN|SE3'].isnull()]

Unnamed: 0,Week_Number,Stored Energy Value [MWh] - BZN|SE3
53,54,


In [71]:
joined_water_week_SE3['Stored Energy Value [MWh] - BZN|SE3'] = joined_water_week_SE3['Stored Energy Value [MWh] - BZN|SE3'].interpolate(option='spline')

In [72]:
joined_water_week_SE3.iloc[[53]]

Unnamed: 0,Week_Number,Stored Energy Value [MWh] - BZN|SE3
53,54,2041500.0


In [73]:
# Merging the dataframes
water_level_dataframes = [joined_water_week_NO1, joined_water_week_NO2, joined_water_week_NO3, joined_water_week_NO5, joined_water_week_SE3]
water_level_dataframes = reduce(lambda left, right: pd.merge(left,right, on=["Week_Number"]), water_level_dataframes)

# printing out the 5 first rows of the resulting dataframe
water_level_dataframes.info

<bound method DataFrame.info of      Week_Number  Stored Energy Value [MWh] - BZN|NO1  \
0              1                              4140000   
1              2                              4012000   
2              3                              3844000   
3              4                              3718000   
4              5                              3568000   
..           ...                                  ...   
154          155                              4547299   
155          156                              4240847   
156          157                              4054917   
157          158                              3915448   
158          159                              3741135   

     Stored Energy Value [MWh] - BZN|NO2  Stored Energy Value [MWh] - BZN|NO3  \
0                               23767000                              5418000   
1                               24273000                              5588000   
2                               24560000

In [74]:
start_date = '2019-12-23'
end_date = '2023-01-02'
week_date_timeframe = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='7D'))
week_date_timeframe.rename(columns={0: 'Week_start_date'}, inplace=True)
week_date_timeframe.info

<bound method DataFrame.info of     Week_start_date
0        2019-12-23
1        2019-12-30
2        2020-01-06
3        2020-01-13
4        2020-01-20
..              ...
154      2022-12-05
155      2022-12-12
156      2022-12-19
157      2022-12-26
158      2023-01-02

[159 rows x 1 columns]>

In [75]:
joined_week_date_water_dataframe = pd.concat([week_date_timeframe, water_level_dataframes], axis=1)
joined_week_date_water_dataframe.info

<bound method DataFrame.info of     Week_start_date  Week_Number  Stored Energy Value [MWh] - BZN|NO1  \
0        2019-12-23            1                              4140000   
1        2019-12-30            2                              4012000   
2        2020-01-06            3                              3844000   
3        2020-01-13            4                              3718000   
4        2020-01-20            5                              3568000   
..              ...          ...                                  ...   
154      2022-12-05          155                              4547299   
155      2022-12-12          156                              4240847   
156      2022-12-19          157                              4054917   
157      2022-12-26          158                              3915448   
158      2023-01-02          159                              3741135   

     Stored Energy Value [MWh] - BZN|NO2  Stored Energy Value [MWh] - BZN|NO3  \
0         

In [76]:
joined_week_date_water_dataframe.drop('Week_Number', axis=1, inplace=True)

In [77]:
joined_week_date_water_dataframe.head()

Unnamed: 0,Week_start_date,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2019-12-23,4140000,23767000,5418000,11156000,2248000.0
1,2019-12-30,4012000,24273000,5588000,11211000,2220000.0
2,2020-01-06,3844000,24560000,5541000,11074000,2189000.0
3,2020-01-13,3718000,24476000,5285000,10810000,2168000.0
4,2020-01-20,3568000,24254000,5402000,10644000,2096000.0


In [78]:
joined_week_date_water_dataframe.set_index('Week_start_date', inplace=True)

In [79]:
joined_week_date_water_dataframe.index

DatetimeIndex(['2019-12-23', '2019-12-30', '2020-01-06', '2020-01-13',
               '2020-01-20', '2020-01-27', '2020-02-03', '2020-02-10',
               '2020-02-17', '2020-02-24',
               ...
               '2022-10-31', '2022-11-07', '2022-11-14', '2022-11-21',
               '2022-11-28', '2022-12-05', '2022-12-12', '2022-12-19',
               '2022-12-26', '2023-01-02'],
              dtype='datetime64[ns]', name='Week_start_date', length=159, freq=None)

In [80]:
joined_week_date_water_dataframe.head()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-12-23,4140000,23767000,5418000,11156000,2248000.0
2019-12-30,4012000,24273000,5588000,11211000,2220000.0
2020-01-06,3844000,24560000,5541000,11074000,2189000.0
2020-01-13,3718000,24476000,5285000,10810000,2168000.0
2020-01-20,3568000,24254000,5402000,10644000,2096000.0


In [81]:
# uppsampling joined_water_week dataframe to an hourly frequency
joined_week_date_water_dataframe = joined_week_date_water_dataframe.asfreq('H')

In [82]:
# printing out 5 first rows of the resulting dataframe
joined_week_date_water_dataframe.head()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-12-23 00:00:00,4140000.0,23767000.0,5418000.0,11156000.0,2248000.0
2019-12-23 01:00:00,,,,,
2019-12-23 02:00:00,,,,,
2019-12-23 03:00:00,,,,,
2019-12-23 04:00:00,,,,,


In [83]:
# printing out 5 last rows of the resulting dataframe
joined_week_date_water_dataframe.tail()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-01 20:00:00,,,,,
2023-01-01 21:00:00,,,,,
2023-01-01 22:00:00,,,,,
2023-01-01 23:00:00,,,,,
2023-01-02 00:00:00,3741135.0,21066642.0,4657760.0,11620688.0,1584000.0


In [84]:
# performing cubic spline interpolation (3rd degree polynomial)
interpolated_df = joined_week_date_water_dataframe.interpolate(method='spline', order=3)

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.


In [85]:
# printing out the 5 first rows of the interpolated dataframe
interpolated_df.head()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-12-23 00:00:00,4140000.0,23767000.0,5418000.0,11156000.0,2248000.0
2019-12-23 01:00:00,4139608.0,23770120.0,5419142.0,11156850.0,2247935.0
2019-12-23 02:00:00,4139210.0,23773240.0,5420285.0,11157700.0,2247869.0
2019-12-23 03:00:00,4138805.0,23776360.0,5421430.0,11158540.0,2247800.0
2019-12-23 04:00:00,4138395.0,23779490.0,5422576.0,11159370.0,2247730.0


In [86]:
# checking data for missing values, if any
interpolated_df.isna().sum()

Stored Energy Value [MWh] - BZN|NO1    0
Stored Energy Value [MWh] - BZN|NO2    0
Stored Energy Value [MWh] - BZN|NO3    0
Stored Energy Value [MWh] - BZN|NO5    0
Stored Energy Value [MWh] - BZN|SE3    0
dtype: int64

In [87]:
# filtering out the data from year 2021 and 2023
filtering_interpolation_df = interpolated_df.loc[(interpolated_df.index >= '2020-01-01 00:00:00')
                                                 & (interpolated_df.index <= '2022-12-31 23:00:00')]

In [88]:
# resetting index
water_reservoir = filtering_interpolation_df
water_reservoir = water_reservoir.reset_index()
water_reservoir = water_reservoir.rename(columns={'Week_start_date': 'start MTU (UTC)'})

In [89]:
water_reservoir.head()

Unnamed: 0,start MTU (UTC),Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2020-01-01 00:00:00,3962493.0,24389360.0,5608103.0,11192400.0,2209379.0
1,2020-01-01 01:00:00,3961448.0,24391550.0,5608290.0,11191840.0,2209164.0
2,2020-01-01 02:00:00,3960404.0,24393720.0,5608466.0,11191280.0,2208948.0
3,2020-01-01 03:00:00,3959359.0,24395890.0,5608632.0,11190710.0,2208733.0
4,2020-01-01 04:00:00,3958314.0,24398050.0,5608788.0,11190130.0,2208518.0


### Aggregating all sub-datasets
Now that we have aggregated each of the sub-datasets, we can merge them together into a larger dataset that can be used to train time series electricity price prediction models on. we will wait with adding the water_reservoir until the time-range column 'MTU (UTC)' of the other datasets have been converted to datetime columns.

In [90]:
# merging prices, loads, actual_generation_selected and
# import_export_no1_neighbours dataframes on the date time
# column 'MTU (UTC)'

# creating a list of dataframes (excluding water water_reservoir)
no1_dataframes_minus_water_reservoir = [
    prices,
    load,
    actual_generation_selected,
    import_export_no1_neighbours_2020_2022,
    ]

# merging dataframes
no1_minus_water_reservoir = reduce(lambda left, right: pd.merge(
    left,right, on=['MTU (UTC)']), no1_dataframes_minus_water_reservoir)

# printing out shape of the resulting dataframe
no1_minus_water_reservoir.shape

(26304, 51)

we can see that the resulting dataframe has the correct number of rows (8760) and 49 columns.

We print out the start and end of the dataset to see that it has the correct end values for MTU(UTC)

In [91]:
# printing the 5 first rows of resulting dataframe
no1_minus_water_reservoir.head()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,...,Solar - BZN|SE3,Wind Onshore - BZN|SE3,CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
0,01.01.2020 00:00 - 01.01.2020 01:00,31.77,31.77,28.45,31.77,28.45,4333.0,4139.0,3016.0,1950.0,...,0.0,1594.0,0.0,0.0,82.0,0.0,1449.0,0.0,391,0
1,01.01.2020 01:00 - 01.01.2020 02:00,31.57,31.57,27.9,31.57,27.9,4250.0,4114.0,2945.0,1926.0,...,0.0,1690.0,0.0,0.0,48.0,0.0,1283.0,0.0,348,0
2,01.01.2020 02:00 - 01.01.2020 03:00,31.28,31.28,27.52,31.28,27.52,4167.0,4030.0,3032.0,1913.0,...,0.0,1783.0,0.0,0.0,0.0,6.0,962.0,0.0,500,0
3,01.01.2020 03:00 - 01.01.2020 04:00,30.72,30.72,27.54,30.72,27.54,4145.0,4032.0,2988.0,1894.0,...,0.0,1900.0,0.0,0.0,0.0,6.0,767.0,0.0,610,0
4,01.01.2020 04:00 - 01.01.2020 05:00,30.27,30.27,26.55,30.27,26.55,4222.0,4032.0,2848.0,1901.0,...,0.0,1995.0,0.0,0.0,0.0,22.0,642.0,0.0,909,0


In [92]:
# printing the 5 last rows of resulting dataframe
no1_minus_water_reservoir.tail()

Unnamed: 0,MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,...,Solar - BZN|SE3,Wind Onshore - BZN|SE3,CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
26299,31.12.2022 19:00 - 31.12.2022 20:00,123.61,123.61,23.82,123.61,11.57,4650.0,4333.0,3612.0,2128.0,...,0.0,1499.0,621.0,0.0,146.0,0.0,43.0,0.0,2083,0
26300,31.12.2022 20:00 - 31.12.2022 21:00,121.09,121.09,23.93,121.09,14.89,4573.0,4270.0,3605.0,2089.0,...,0.0,1282.0,595.0,0.0,124.0,0.0,24.0,0.0,2097,0
26301,31.12.2022 21:00 - 31.12.2022 22:00,120.0,120.0,23.75,120.0,9.94,4453.0,4294.0,3479.0,2037.0,...,0.0,1168.0,608.0,0.0,91.0,0.0,41.0,0.0,2007,0
26302,31.12.2022 22:00 - 31.12.2022 23:00,119.98,119.98,23.56,119.98,4.84,4389.0,4282.0,3415.0,1991.0,...,0.0,1033.0,504.0,0.0,90.0,0.0,0.0,29.0,2119,0
26303,31.12.2022 23:00 - 01.01.2023 00:00,119.32,119.32,23.35,119.32,2.01,4444.0,4182.0,3346.0,2009.0,...,0.0,877.0,956.0,0.0,83.0,0.0,0.0,12.0,1612,0


We see that the dataframe starts in the beginning of 2022 and ends at the end of 2022 so the data has been merged successfuly.

We can now convert the datetime range into specific datetimes so the data can be used in timeseries data prediction, and so that the 'water_reservoir' dataset can be added to the final dataset.

In [93]:
# splitting the time column 'MTU (UTC)' data on the '-' symbol
no1_minus_water_reservoir[['start MTU (UTC)', 'end MTU (UTC)']] = no1_minus_water_reservoir['MTU (UTC)'].str.split('-', 1, expand=True)
no1_minus_water_reservoir.head()
# dropping the 'MTU (CET/CEST)' column
no1_minus_water_reservoir = no1_minus_water_reservoir.drop(['MTU (UTC)'], axis=1)

# converting the start and end columns to datetime
no1_minus_water_reservoir['start MTU (UTC)'] = pd.to_datetime(no1_minus_water_reservoir['start MTU (UTC)'])
no1_minus_water_reservoir['end MTU (UTC)'] = pd.to_datetime(no1_minus_water_reservoir['end MTU (UTC)'])

# printing out 5 first rows of the resulting dataframe
no1_minus_water_reservoir.head()

Unnamed: 0,Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3,...,CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW],start MTU (UTC),end MTU (UTC)
0,31.77,31.77,28.45,31.77,28.45,4333.0,4139.0,3016.0,1950.0,9350,...,0.0,0.0,82.0,0.0,1449.0,0.0,391,0,2020-01-01 00:00:00,2020-01-01 01:00:00
1,31.57,31.57,27.9,31.57,27.9,4250.0,4114.0,2945.0,1926.0,9158,...,0.0,0.0,48.0,0.0,1283.0,0.0,348,0,2020-01-01 01:00:00,2020-01-01 02:00:00
2,31.28,31.28,27.52,31.28,27.52,4167.0,4030.0,3032.0,1913.0,8980,...,0.0,0.0,0.0,6.0,962.0,0.0,500,0,2020-01-01 02:00:00,2020-01-01 03:00:00
3,30.72,30.72,27.54,30.72,27.54,4145.0,4032.0,2988.0,1894.0,8914,...,0.0,0.0,0.0,6.0,767.0,0.0,610,0,2020-01-01 03:00:00,2020-01-01 04:00:00
4,30.27,30.27,26.55,30.27,26.55,4222.0,4032.0,2848.0,1901.0,8964,...,0.0,0.0,0.0,22.0,642.0,0.0,909,0,2020-01-01 04:00:00,2020-01-01 05:00:00


Checking datatype of the time columns in the resulting dataframe.


In [94]:
# printing info about columns:
no1_minus_water_reservoir.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26304 entries, 0 to 26303
Data columns (total 52 columns):
 #   Column                                     Non-Null Count  Dtype         
---  ------                                     --------------  -----         
 0   Day-ahead Price [EUR/MWh] BZN|NO1          26304 non-null  float64       
 1   Day-ahead Price [EUR/MWh] BZN|NO2          26304 non-null  float64       
 2   Day-ahead Price [EUR/MWh] BZN|NO3          26304 non-null  float64       
 3   Day-ahead Price [EUR/MWh] BZN|NO5          26304 non-null  float64       
 4   Day-ahead Price [EUR/MWh] BZN|SE3          26304 non-null  float64       
 5   Actual Total Load [MW] - BZN|NO1           26303 non-null  float64       
 6   Actual Total Load [MW] - BZN|NO2           26303 non-null  float64       
 7   Actual Total Load [MW] - BZN|NO3           26303 non-null  float64       
 8   Actual Total Load [MW] - BZN|NO5           26303 non-null  float64       
 9   Actual Total Load

We can now add the "water_reservoir" dataframe to the other data

In [95]:
# merging dataframes no1_minus_water_reservoir and water_reservoir
no1 = reduce(lambda left, right: pd.merge(
    left,right, on=['start MTU (UTC)']), [no1_minus_water_reservoir, water_reservoir])

# printing out shape of the resulting dataframe
no1.shape

(26304, 57)

In [96]:
no1.isna().sum()

Day-ahead Price [EUR/MWh] BZN|NO1             0
Day-ahead Price [EUR/MWh] BZN|NO2             0
Day-ahead Price [EUR/MWh] BZN|NO3             0
Day-ahead Price [EUR/MWh] BZN|NO5             0
Day-ahead Price [EUR/MWh] BZN|SE3             0
Actual Total Load [MW] - BZN|NO1              1
Actual Total Load [MW] - BZN|NO2              1
Actual Total Load [MW] - BZN|NO3              1
Actual Total Load [MW] - BZN|NO5              1
Actual Total Load [MW] - BZN|SE3              0
Biomass - BZN|NO1                            13
Fossil Gas - BZN|NO1                          5
Hydro Run-of-river and poundage - BZN|NO1     5
Hydro Water Reservoir - BZN|NO1               5
Waste - BZN|NO1                               5
Wind Onshore - BZN|NO1                        5
Fossil Gas - BZN|NO2                          6
Hydro Pumped Storage Aggregated- BZN|NO2      5
Hydro Run-of-river and poundage - BZN|NO2     5
Hydro Water Reservoir - BZN|NO2               5
Waste - BZN|NO2                         

we can see that the resulting dataframe has the correct number of rows (8760) and 55 columns.

We print out the start and end of the dataset to see that it has the correct end values for MTU(UTC)

In [97]:
# printing out 5 first rows of the resulting dataframe
no1.head()

Unnamed: 0,Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3,...,CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW],start MTU (UTC),end MTU (UTC),Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,31.77,31.77,28.45,31.77,28.45,4333.0,4139.0,3016.0,1950.0,9350,...,0.0,391,0,2020-01-01 00:00:00,2020-01-01 01:00:00,3962493.0,24389360.0,5608103.0,11192400.0,2209379.0
1,31.57,31.57,27.9,31.57,27.9,4250.0,4114.0,2945.0,1926.0,9158,...,0.0,348,0,2020-01-01 01:00:00,2020-01-01 02:00:00,3961448.0,24391550.0,5608290.0,11191840.0,2209164.0
2,31.28,31.28,27.52,31.28,27.52,4167.0,4030.0,3032.0,1913.0,8980,...,0.0,500,0,2020-01-01 02:00:00,2020-01-01 03:00:00,3960404.0,24393720.0,5608466.0,11191280.0,2208948.0
3,30.72,30.72,27.54,30.72,27.54,4145.0,4032.0,2988.0,1894.0,8914,...,0.0,610,0,2020-01-01 03:00:00,2020-01-01 04:00:00,3959359.0,24395890.0,5608632.0,11190710.0,2208733.0
4,30.27,30.27,26.55,30.27,26.55,4222.0,4032.0,2848.0,1901.0,8964,...,0.0,909,0,2020-01-01 04:00:00,2020-01-01 05:00:00,3958314.0,24398050.0,5608788.0,11190130.0,2208518.0


In [98]:
# printing the last 5 rows of the resulting dataframe
no1.tail()

Unnamed: 0,Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3,...,CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW],start MTU (UTC),end MTU (UTC),Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
26299,123.61,123.61,23.82,123.61,11.57,4650.0,4333.0,3612.0,2128.0,9602,...,0.0,2083,0,2022-12-31 19:00:00,2022-12-31 20:00:00,3776753.0,21202240.0,4758064.0,11723170.0,1590230.0
26300,121.09,121.09,23.93,121.09,14.89,4573.0,4270.0,3605.0,2089.0,9251,...,0.0,2097,0,2022-12-31 20:00:00,2022-12-31 21:00:00,3775572.0,21198300.0,4754910.0,11719940.0,1590075.0
26301,120.0,120.0,23.75,120.0,9.94,4453.0,4294.0,3479.0,2037.0,9092,...,0.0,2007,0,2022-12-31 21:00:00,2022-12-31 22:00:00,3774389.0,21194300.0,4751735.0,11716680.0,1589917.0
26302,119.98,119.98,23.56,119.98,4.84,4389.0,4282.0,3415.0,1991.0,8944,...,29.0,2119,0,2022-12-31 22:00:00,2022-12-31 23:00:00,3773202.0,21190260.0,4748539.0,11713400.0,1589755.0
26303,119.32,119.32,23.35,119.32,2.01,4444.0,4182.0,3346.0,2009.0,8955,...,12.0,1612,0,2022-12-31 23:00:00,2023-01-01 00:00:00,3772012.0,21186170.0,4745322.0,11710100.0,1589588.0


We can see that the columns for start datetime and end datetime of the dataset is correct. One thing of note is that the time columns are almost at the end of the dataset. Since we are dealing with time series data, it would be useful to have the datetime columns at the beginning of the dataframe. We will therefore move the start and end datetime columns to the start of the dataframe.

In [99]:
# creating a list of column names
no1_columns = no1.columns.tolist()
# removing names of datetime column names from columnslist
no1_columns.remove('start MTU (UTC)')
no1_columns.remove('end MTU (UTC)')

# inserting the datetime column names to start of list 
no1_columns.insert(0, 'end MTU (UTC)')
no1_columns.insert(0, 'start MTU (UTC)')

# printing out resulting list
print(no1_columns)

['start MTU (UTC)', 'end MTU (UTC)', 'Day-ahead Price [EUR/MWh] BZN|NO1', 'Day-ahead Price [EUR/MWh] BZN|NO2', 'Day-ahead Price [EUR/MWh] BZN|NO3', 'Day-ahead Price [EUR/MWh] BZN|NO5', 'Day-ahead Price [EUR/MWh] BZN|SE3', 'Actual Total Load [MW] - BZN|NO1', 'Actual Total Load [MW] - BZN|NO2', 'Actual Total Load [MW] - BZN|NO3', 'Actual Total Load [MW] - BZN|NO5', 'Actual Total Load [MW] - BZN|SE3', 'Biomass - BZN|NO1', 'Fossil Gas - BZN|NO1', 'Hydro Run-of-river and poundage - BZN|NO1', 'Hydro Water Reservoir - BZN|NO1', 'Waste - BZN|NO1', 'Wind Onshore - BZN|NO1', 'Fossil Gas - BZN|NO2', 'Hydro Pumped Storage Aggregated- BZN|NO2', 'Hydro Run-of-river and poundage - BZN|NO2', 'Hydro Water Reservoir - BZN|NO2', 'Waste - BZN|NO2', 'Wind Onshore - BZN|NO2', 'Fossil Gas - BZN|NO3', 'Hydro Pumped Storage Aggregated- BZN|NO3', 'Hydro Run-of-river and poundage - BZN|NO3', 'Hydro Water Reservoir - BZN|NO3', 'Other - BZN|NO3', 'Other renewable - BZN|NO3', 'Waste - BZN|NO3', 'Wind Onshore - BZN|

We now have a list with the time columns at the beginning, and can reorder the columns so the datetime columns are at the begining of the dataframe

In [100]:
# reordering the columns so start time and end time is first
no1 = no1.reindex(columns=no1_columns)

# printing out 5 first rows of dataframe to check if the order has been updated correctly
no1.head()

Unnamed: 0,start MTU (UTC),end MTU (UTC),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3,Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,...,CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW],Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2020-01-01 00:00:00,2020-01-01 01:00:00,31.77,31.77,28.45,31.77,28.45,4333.0,4139.0,3016.0,...,0.0,1449.0,0.0,391,0,3962493.0,24389360.0,5608103.0,11192400.0,2209379.0
1,2020-01-01 01:00:00,2020-01-01 02:00:00,31.57,31.57,27.9,31.57,27.9,4250.0,4114.0,2945.0,...,0.0,1283.0,0.0,348,0,3961448.0,24391550.0,5608290.0,11191840.0,2209164.0
2,2020-01-01 02:00:00,2020-01-01 03:00:00,31.28,31.28,27.52,31.28,27.52,4167.0,4030.0,3032.0,...,6.0,962.0,0.0,500,0,3960404.0,24393720.0,5608466.0,11191280.0,2208948.0
3,2020-01-01 03:00:00,2020-01-01 04:00:00,30.72,30.72,27.54,30.72,27.54,4145.0,4032.0,2988.0,...,6.0,767.0,0.0,610,0,3959359.0,24395890.0,5608632.0,11190710.0,2208733.0
4,2020-01-01 04:00:00,2020-01-01 05:00:00,30.27,30.27,26.55,30.27,26.55,4222.0,4032.0,2848.0,...,22.0,642.0,0.0,909,0,3958314.0,24398050.0,5608788.0,11190130.0,2208518.0


We can see that the datetime columns are at the start of the dataframe and that we still have 55 columns, so the reordering was successful.

We will now print out the final order of the features for reference.

In [101]:
# printing out the final order of the features
no1.columns

Index(['start MTU (UTC)', 'end MTU (UTC)', 'Day-ahead Price [EUR/MWh] BZN|NO1',
       'Day-ahead Price [EUR/MWh] BZN|NO2',
       'Day-ahead Price [EUR/MWh] BZN|NO3',
       'Day-ahead Price [EUR/MWh] BZN|NO5',
       'Day-ahead Price [EUR/MWh] BZN|SE3', 'Actual Total Load [MW] - BZN|NO1',
       'Actual Total Load [MW] - BZN|NO2', 'Actual Total Load [MW] - BZN|NO3',
       'Actual Total Load [MW] - BZN|NO5', 'Actual Total Load [MW] - BZN|SE3',
       'Biomass - BZN|NO1', 'Fossil Gas - BZN|NO1',
       'Hydro Run-of-river and poundage - BZN|NO1',
       'Hydro Water Reservoir - BZN|NO1', 'Waste - BZN|NO1',
       'Wind Onshore - BZN|NO1', 'Fossil Gas - BZN|NO2',
       'Hydro Pumped Storage Aggregated- BZN|NO2',
       'Hydro Run-of-river and poundage - BZN|NO2',
       'Hydro Water Reservoir - BZN|NO2', 'Waste - BZN|NO2',
       'Wind Onshore - BZN|NO2', 'Fossil Gas - BZN|NO3',
       'Hydro Pumped Storage Aggregated- BZN|NO3',
       'Hydro Run-of-river and poundage - BZN|NO3',
    

#### summary:
The Final dataset for zone NO1 and the neighbouring zones has 8760 rows and 55 columns (two of which is datetime for start and end of time period).

#### Export to csv
Code to export dataframe to csv:

In [102]:
# uncomment to export no1 dataframe to csv
# no1.to_csv('../datasets/no1_aggregated/no1_2020_to_2022_aggregated.csv')