## Data aggregation of energy market prices in bidding zone NO1 (Norway Østlandet) and neighbouring zones for the year of 2022

### Dependencies

In [4]:
# importing dependencies to aggregate dataset
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from functools import reduce # used to merge the dataset

### Aggregation of pricing data
Pricing data is of spot (day-ahead) market prices in the currency Euro per MWh

Importing price data from each zone

In [5]:
prices_no1 = pd.read_csv("../datasets/prices/NO1 Day-ahead Prices_202201010000-202301010000.csv")
prices_no2 = pd.read_csv("../datasets/prices/NO2 Day-ahead Prices_202201010000-202301010000.csv")
prices_no3 = pd.read_csv("../datasets/prices/NO3 Day-ahead Prices_202201010000-202301010000.csv")
prices_no5 = pd.read_csv("../datasets/prices/NO5 Day-ahead Prices_202201010000-202301010000.csv")
prices_se3 = pd.read_csv("../datasets/prices/SE3 Day-ahead Prices_202201010000-202301010000.csv")

Printing out first rows of each dataset to see the structure

In [6]:
prices_no1.head()

Unnamed: 0,MTU (CET/CEST),Day-ahead Price [EUR/MWh],Currency,BZN|NO1
0,01.01.2022 00:00 - 01.01.2022 01:00,132.89,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,129.3,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,132.08,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,111.44,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,112.35,EUR,


In [7]:
prices_no2.head()

Unnamed: 0,MTU (CET/CEST),Day-ahead Price [EUR/MWh],Currency,BZN|NO2
0,01.01.2022 00:00 - 01.01.2022 01:00,132.89,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,129.3,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,132.08,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,111.44,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,112.35,EUR,


In [8]:
prices_no3.head()

Unnamed: 0,MTU (CET/CEST),Day-ahead Price [EUR/MWh],Currency,BZN|NO3
0,01.01.2022 00:00 - 01.01.2022 01:00,46.6,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,41.33,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,42.18,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,44.37,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,37.67,EUR,


In [9]:
prices_no5.head()

Unnamed: 0,MTU (CET/CEST),Day-ahead Price [EUR/MWh],Currency,BZN|NO5
0,01.01.2022 00:00 - 01.01.2022 01:00,132.89,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,129.3,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,132.08,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,111.44,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,112.35,EUR,


In [10]:
prices_se3.head()

Unnamed: 0,MTU (CET/CEST),Day-ahead Price [EUR/MWh],Currency,BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,46.6,EUR,
1,01.01.2022 01:00 - 01.01.2022 02:00,41.33,EUR,
2,01.01.2022 02:00 - 01.01.2022 03:00,42.18,EUR,
3,01.01.2022 03:00 - 01.01.2022 04:00,44.37,EUR,
4,01.01.2022 04:00 - 01.01.2022 05:00,37.67,EUR,


#### preperation for merging of the price data from the different zones:
The common column we will merge the data based on is the date time column "MTU (CET/CEST)". The column currency is redundant since the column that holds price already includes it in the column name. The currency column can therefore be dropped. The datasets have a colum name for which zone the data is from, but does not include any data for each entry. This information can be added to the column name for price and the column for zone can be dropped.

In [11]:
# renaming price columns
prices_no1 = prices_no1.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO1'})
prices_no2 = prices_no2.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO2'})
prices_no3 = prices_no3.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO3'})
prices_no5 = prices_no5.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|NO5'})
prices_se3 = prices_se3.rename(columns={'Day-ahead Price [EUR/MWh]': 'Day-ahead Price [EUR/MWh] BZN|SE3'})

In [12]:
# dropping redundant columns
prices_no1= prices_no1.drop(['Currency', 'BZN|NO1'], axis=1)
prices_no2= prices_no2.drop(['Currency', 'BZN|NO2'], axis=1)
prices_no3= prices_no3.drop(['Currency', 'BZN|NO3'], axis=1)
prices_no5= prices_no5.drop(['Currency', 'BZN|NO5'], axis=1)
prices_se3= prices_se3.drop(['Currency', 'BZN|SE3'], axis=1)

In [13]:
# merging the price data into one dataframe on the date time column 'MTU (CET/CEST)'
price_data_frames = [prices_no1, prices_no2, prices_no3, prices_no5, prices_se3]
prices = reduce(lambda left, right: pd.merge(left,right, on=['MTU (CET/CEST)']), price_data_frames)

# printing out the resulting dataframe
prices.head()

Unnamed: 0,MTU (CET/CEST),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,132.89,132.89,46.6,132.89,46.6
1,01.01.2022 01:00 - 01.01.2022 02:00,129.3,129.3,41.33,129.3,41.33
2,01.01.2022 02:00 - 01.01.2022 03:00,132.08,132.08,42.18,132.08,42.18
3,01.01.2022 03:00 - 01.01.2022 04:00,111.44,111.44,44.37,111.44,44.37
4,01.01.2022 04:00 - 01.01.2022 05:00,112.35,112.35,37.67,112.35,37.67


### Aggregation of load data
The load data contains data about the power consumption in each zone

Importing load data from each zone

In [14]:
load_no1 = pd.read_csv("../datasets/load/NO1 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no2 = pd.read_csv("../datasets/load/NO2 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no3 = pd.read_csv("../datasets/load/NO3 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_no5 = pd.read_csv("../datasets/load/NO5 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")
load_se3 = pd.read_csv("../datasets/load/SE3 Total Load - Day Ahead _ Actual_202201010000-202301010000.csv")

Printing out first rows of each dataset to see the structure

In [15]:
load_no1.head()

Unnamed: 0,Time (CET/CEST),Day-ahead Total Load Forecast [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO1
0,01.01.2022 00:00 - 01.01.2022 01:00,4440.0,4779.0
1,01.01.2022 01:00 - 01.01.2022 02:00,4492.0,4701.0
2,01.01.2022 02:00 - 01.01.2022 03:00,4371.0,4681.0
3,01.01.2022 03:00 - 01.01.2022 04:00,4234.0,4620.0
4,01.01.2022 04:00 - 01.01.2022 05:00,4225.0,4645.0


In [16]:
load_no2.head()

Unnamed: 0,Time (CET/CEST),Day-ahead Total Load Forecast [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO2
0,01.01.2022 00:00 - 01.01.2022 01:00,3248.0,4357.0
1,01.01.2022 01:00 - 01.01.2022 02:00,4170.0,4440.0
2,01.01.2022 02:00 - 01.01.2022 03:00,3796.0,4480.0
3,01.01.2022 03:00 - 01.01.2022 04:00,3423.0,4424.0
4,01.01.2022 04:00 - 01.01.2022 05:00,3439.0,4386.0


In [17]:
load_no3.head()

Unnamed: 0,Time (CET/CEST),Day-ahead Total Load Forecast [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO3
0,01.01.2022 00:00 - 01.01.2022 01:00,3125.0,3487.0
1,01.01.2022 01:00 - 01.01.2022 02:00,3235.0,3463.0
2,01.01.2022 02:00 - 01.01.2022 03:00,3158.0,3388.0
3,01.01.2022 03:00 - 01.01.2022 04:00,3047.0,3387.0
4,01.01.2022 04:00 - 01.01.2022 05:00,3052.0,3376.0


In [18]:
load_no5.head()

Unnamed: 0,Time (CET/CEST),Day-ahead Total Load Forecast [MW] - BZN|NO5,Actual Total Load [MW] - BZN|NO5
0,01.01.2022 00:00 - 01.01.2022 01:00,1389.0,1950.0
1,01.01.2022 01:00 - 01.01.2022 02:00,1427.0,1991.0
2,01.01.2022 02:00 - 01.01.2022 03:00,1460.0,1969.0
3,01.01.2022 03:00 - 01.01.2022 04:00,1493.0,1911.0
4,01.01.2022 04:00 - 01.01.2022 05:00,1472.0,1996.0


In [19]:
load_se3.head()

Unnamed: 0,Time (CET/CEST),Day-ahead Total Load Forecast [MW] - BZN|SE3,Actual Total Load [MW] - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,9577.0,9685.0
1,01.01.2022 01:00 - 01.01.2022 02:00,9483.0,9612.0
2,01.01.2022 02:00 - 01.01.2022 03:00,9435.0,9601.0
3,01.01.2022 03:00 - 01.01.2022 04:00,9402.0,9552.0
4,01.01.2022 04:00 - 01.01.2022 05:00,9453.0,9449.0


#### preperation for merging of the load data from the different zones:
The common values we will merge the data based on is the date time column "Time (CET/CEST)", however this column is named "MTU (CET/CEST)" in the prices dataset. The time column will be renamed to "MTU (CET/CEST)" so that the datasets can be merged. The column "Day-ahead Total Load Forecast" is redundant since we have the actual total load, so the forecast column will be dropped.

In [20]:
# dropping redundant columns
load_no1 = load_no1.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO1'], axis=1)
load_no2 = load_no2.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO2'], axis=1)
load_no3 = load_no3.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO3'], axis=1)
load_no5 = load_no5.drop(['Day-ahead Total Load Forecast [MW] - BZN|NO5'], axis=1)
load_se3 = load_se3.drop(['Day-ahead Total Load Forecast [MW] - BZN|SE3'], axis=1)

In [21]:
# renaming time columns
load_no1 = load_no1.rename(columns={'Time (CET/CEST)': 'MTU (CET/CEST)'})
load_no2 = load_no2.rename(columns={'Time (CET/CEST)': 'MTU (CET/CEST)'})
load_no3 = load_no3.rename(columns={'Time (CET/CEST)': 'MTU (CET/CEST)'})
load_no5 = load_no5.rename(columns={'Time (CET/CEST)': 'MTU (CET/CEST)'})
load_se3 = load_se3.rename(columns={'Time (CET/CEST)': 'MTU (CET/CEST)'})

In [22]:
# merging the load data into one dataframe on the date time column 'MTU (CET/CEST)'
load_data_frames = [load_no1, load_no2, load_no3, load_no5, load_se3]
loads = reduce(lambda left, right: pd.merge(left,right, on=['MTU (CET/CEST)']), load_data_frames)

# printing out the resulting dataframe
loads.head()

Unnamed: 0,MTU (CET/CEST),Actual Total Load [MW] - BZN|NO1,Actual Total Load [MW] - BZN|NO2,Actual Total Load [MW] - BZN|NO3,Actual Total Load [MW] - BZN|NO5,Actual Total Load [MW] - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00,4779.0,4357.0,3487.0,1950.0,9685.0
1,01.01.2022 01:00 - 01.01.2022 02:00,4701.0,4440.0,3463.0,1991.0,9612.0
2,01.01.2022 02:00 - 01.01.2022 03:00,4681.0,4480.0,3388.0,1969.0,9601.0
3,01.01.2022 03:00 - 01.01.2022 04:00,4620.0,4424.0,3387.0,1911.0,9552.0
4,01.01.2022 04:00 - 01.01.2022 05:00,4645.0,4386.0,3376.0,1996.0,9449.0


In [23]:
# TODO: handle missing values: NB! Bytt til det endelige datasettet, itte det for prices!!!!!!!!!

In [24]:
# checking data for missing values, if any
prices.isna().sum()

MTU (CET/CEST)                       0
Day-ahead Price [EUR/MWh] BZN|NO1    1
Day-ahead Price [EUR/MWh] BZN|NO2    1
Day-ahead Price [EUR/MWh] BZN|NO3    1
Day-ahead Price [EUR/MWh] BZN|NO5    1
Day-ahead Price [EUR/MWh] BZN|SE3    1
dtype: int64

In [25]:
# checking which row is missing values
null_data = prices[prices.isnull().any(axis=1)]

In [26]:
null_data

Unnamed: 0,MTU (CET/CEST),Day-ahead Price [EUR/MWh] BZN|NO1,Day-ahead Price [EUR/MWh] BZN|NO2,Day-ahead Price [EUR/MWh] BZN|NO3,Day-ahead Price [EUR/MWh] BZN|NO5,Day-ahead Price [EUR/MWh] BZN|SE3
2042,27.03.2022 02:00 - 27.03.2022 03:00,,,,,


This row is created as a result of the move from winter time to summer time

### Aggregation of energy generation data

In [27]:
actual_generation_no1 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/NO1 Actual Generation per Production Type_202201010000-202301010000.csv")
actual_generation_no2 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/NO2 Actual Generation per Production Type_202201010000-202301010000.csv")
actual_generation_no3 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/NO3 Actual Generation per Production Type_202201010000-202301010000.csv")
actual_generation_no5 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/NO5 Actual Generation per Production Type_202201010000-202301010000.csv")
actual_generation_se3 = pd.read_csv("../datasets/wind_solar_fossil_biomass_and_others/SE3 Actual Generation per Production Type_202201010000-202301010000.csv")

In [28]:
actual_generation_no1.head()

Unnamed: 0,Area,MTU,Biomass - Actual Aggregated [MW],Fossil Brown coal/Lignite - Actual Aggregated [MW],Fossil Coal-derived gas - Actual Aggregated [MW],Fossil Gas - Actual Aggregated [MW],Fossil Hard coal - Actual Aggregated [MW],Fossil Oil - Actual Aggregated [MW],Fossil Oil shale - Actual Aggregated [MW],Fossil Peat - Actual Aggregated [MW],...,Hydro Run-of-river and poundage - Actual Aggregated [MW],Hydro Water Reservoir - Actual Aggregated [MW],Marine - Actual Aggregated [MW],Nuclear - Actual Aggregated [MW],Other - Actual Aggregated [MW],Other renewable - Actual Aggregated [MW],Solar - Actual Aggregated [MW],Waste - Actual Aggregated [MW],Wind Offshore - Actual Aggregated [MW],Wind Onshore - Actual Aggregated [MW]
0,BZN|NO1,01.01.2022 00:00 - 01.01.2022 01:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,899.0,353.0,n/e,n/e,n/e,n/e,n/e,9.0,n/e,139.0
1,BZN|NO1,01.01.2022 01:00 - 01.01.2022 02:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,878.0,321.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,172.0
2,BZN|NO1,01.01.2022 02:00 - 01.01.2022 03:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,897.0,330.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,151.0
3,BZN|NO1,01.01.2022 03:00 - 01.01.2022 04:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,885.0,275.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,160.0
4,BZN|NO1,01.01.2022 04:00 - 01.01.2022 05:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,890.0,269.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,148.0


In [29]:
# Renaming every column to have zones instead of using a column for 'Area'
actual_generation_no1.columns = ['Area','MTU (CET/CEST)','Biomass - BZN|NO1', 'Fossil Brown coal/Lignite - BZN|NO1', 'Fossil Coal-derived gas - BZN|NO1', 'Fossil Gas - BZN|NO1', 'Fossil Hard coal - BZN|NO1', 'Fossil Oil - BZN|NO1', 'Fossil Oil shale - BZN|NO1', 'Fossil Peat - BZN|NO1', 'Geothermal - BZN|NO1', 'Hydro Pumped Storage Aggregated- BZN|NO1', 'Hydro Pumped Storage Consumption - BZN|NO1', 'Hydro Run-of-river and poundage - BZN|NO1', 'Hydro Water Reservoir - BZN|NO1', 'Marine - BZN|NO1', 'Nuclear - BZN|NO1', 'Other - BZN|NO1', 'Other renewable - BZN|NO1', 'Solar - BZN|NO1', 'Waste - BZN|NO1', 'Wind Offshore - BZN|NO1', 'Wind Onshore - BZN|NO1']
actual_generation_no2.columns = ['Area','MTU (CET/CEST)','Biomass - BZN|NO2', 'Fossil Brown coal/Lignite - BZN|NO2', 'Fossil Coal-derived gas - BZN|NO2', 'Fossil Gas - BZN|NO2', 'Fossil Hard coal - BZN|NO2', 'Fossil Oil - BZN|NO2', 'Fossil Oil shale - BZN|NO2', 'Fossil Peat - BZN|NO2', 'Geothermal - BZN|NO2', 'Hydro Pumped Storage Aggregated- BZN|NO2', 'Hydro Pumped Storage Consumption - BZN|NO2', 'Hydro Run-of-river and poundage - BZN|NO1', 'Hydro Water Reservoir - BZN|NO2', 'Marine - BZN|NO2', 'Nuclear - BZN|NO2', 'Other - BZN|NO2', 'Other renewable - BZN|NO2', 'Solar - BZN|NO2', 'Waste - BZN|NO2', 'Wind Offshore - BZN|NO2', 'Wind Onshore - BZN|NO2']
actual_generation_no3.columns = ['Area','MTU (CET/CEST)','Biomass - BZN|NO3', 'Fossil Brown coal/Lignite - BZN|NO3', 'Fossil Coal-derived gas - BZN|NO3', 'Fossil Gas - BZN|NO3', 'Fossil Hard coal - BZN|NO3', 'Fossil Oil - BZN|NO3', 'Fossil Oil shale - BZN|NO3', 'Fossil Peat - BZN|NO3', 'Geothermal - BZN|NO3', 'Hydro Pumped Storage Aggregated- BZN|NO3', 'Hydro Pumped Storage Consumption - BZN|NO3', 'Hydro Run-of-river and poundage - BZN|NO3', 'Hydro Water Reservoir - BZN|NO3', 'Marine - BZN|NO3', 'Nuclear - BZN|NO3', 'Other - BZN|NO3', 'Other renewable - BZN|NO3', 'Solar - BZN|NO3', 'Waste - BZN|NO3', 'Wind Offshore - BZN|NO3', 'Wind Onshore - BZN|NO3']
actual_generation_no5.columns = ['Area','MTU (CET/CEST)','Biomass - BZN|NO5', 'Fossil Brown coal/Lignite - BZN|NO5', 'Fossil Coal-derived gas - BZN|NO5', 'Fossil Gas - BZN|NO5', 'Fossil Hard coal - BZN|NO5', 'Fossil Oil - BZN|NO5', 'Fossil Oil shale - BZN|NO5', 'Fossil Peat - BZN|NO5', 'Geothermal - BZN|NO5', 'Hydro Pumped Storage Aggregated- BZN|NO5', 'Hydro Pumped Storage Consumption - BZN|NO5', 'Hydro Run-of-river and poundage - BZN|NO5', 'Hydro Water Reservoir - BZN|NO5', 'Marine - BZN|NO5', 'Nuclear - BZN|NO5', 'Other - BZN|NO5', 'Other renewable - BZN|NO5', 'Solar - BZN|NO5', 'Waste - BZN|NO5', 'Wind Offshore - BZN|NO5', 'Wind Onshore - BZN|NO5']
actual_generation_se3.columns = ['Area','MTU (CET/CEST)','Biomass - BZN|SE3', 'Fossil Brown coal/Lignite - BZN|SE3', 'Fossil Coal-derived gas - BZN|SE3', 'Fossil Gas - BZN|SE3', 'Fossil Hard coal - BZN|SE3', 'Fossil Oil - BZN|SE3', 'Fossil Oil shale - BZN|SE3', 'Fossil Peat - BZN|SE3', 'Geothermal - BZN|SE3', 'Hydro Pumped Storage Aggregated- BZN|SE3', 'Hydro Pumped Storage Consumption - BZN|SE3', 'Hydro Run-of-river and poundage - BZN|SE3', 'Hydro Water Reservoir - BZN|SE3', 'Marine - BZN|SE3', 'Nuclear - BZN|SE3', 'Other - BZN|SE3', 'Other renewable - BZN|SE3', 'Solar - BZN|SE3', 'Waste - BZN|SE3', 'Wind Offshore - BZN|SE3', 'Wind Onshore - BZN|SE3']
actual_generation_no1.head()

Unnamed: 0,Area,MTU (CET/CEST),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Marine - BZN|NO1,Nuclear - BZN|NO1,Other - BZN|NO1,Other renewable - BZN|NO1,Solar - BZN|NO1,Waste - BZN|NO1,Wind Offshore - BZN|NO1,Wind Onshore - BZN|NO1
0,BZN|NO1,01.01.2022 00:00 - 01.01.2022 01:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,899.0,353.0,n/e,n/e,n/e,n/e,n/e,9.0,n/e,139.0
1,BZN|NO1,01.01.2022 01:00 - 01.01.2022 02:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,878.0,321.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,172.0
2,BZN|NO1,01.01.2022 02:00 - 01.01.2022 03:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,897.0,330.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,151.0
3,BZN|NO1,01.01.2022 03:00 - 01.01.2022 04:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,885.0,275.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,160.0
4,BZN|NO1,01.01.2022 04:00 - 01.01.2022 05:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,...,890.0,269.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,148.0


In [30]:
# Dropping area because we keep the zone name in the column name
actual_generation_no1 = actual_generation_no1.drop(['Area'], axis=1)
actual_generation_no2 = actual_generation_no2.drop(['Area'], axis=1)
actual_generation_no3 = actual_generation_no3.drop(['Area'], axis=1)
actual_generation_no5 = actual_generation_no5.drop(['Area'], axis=1)
actual_generation_se3 = actual_generation_se3.drop(['Area'], axis=1)
actual_generation_no1.head()

Unnamed: 0,MTU (CET/CEST),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,Geothermal - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|NO1,Hydro Water Reservoir - BZN|NO1,Marine - BZN|NO1,Nuclear - BZN|NO1,Other - BZN|NO1,Other renewable - BZN|NO1,Solar - BZN|NO1,Waste - BZN|NO1,Wind Offshore - BZN|NO1,Wind Onshore - BZN|NO1
0,01.01.2022 00:00 - 01.01.2022 01:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,899.0,353.0,n/e,n/e,n/e,n/e,n/e,9.0,n/e,139.0
1,01.01.2022 01:00 - 01.01.2022 02:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,878.0,321.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,172.0
2,01.01.2022 02:00 - 01.01.2022 03:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,897.0,330.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,151.0
3,01.01.2022 03:00 - 01.01.2022 04:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,885.0,275.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,160.0
4,01.01.2022 04:00 - 01.01.2022 05:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,890.0,269.0,n/e,n/e,n/e,n/e,n/e,8.0,n/e,148.0


In [31]:
# Creating one dataframe from the 5 dataframes for each zone
actual_generation_dataframes = [actual_generation_no1, actual_generation_no2, actual_generation_no3, actual_generation_no5, actual_generation_se3]
actual_generation = reduce(lambda left, right: pd.merge(left,right, on=['MTU (CET/CEST)']), actual_generation_dataframes)

# printing out the resulting dataframe
actual_generation.head()

Unnamed: 0,MTU (CET/CEST),Biomass - BZN|NO1,Fossil Brown coal/Lignite - BZN|NO1,Fossil Coal-derived gas - BZN|NO1,Fossil Gas - BZN|NO1,Fossil Hard coal - BZN|NO1,Fossil Oil - BZN|NO1,Fossil Oil shale - BZN|NO1,Fossil Peat - BZN|NO1,Geothermal - BZN|NO1,...,Hydro Run-of-river and poundage - BZN|SE3,Hydro Water Reservoir - BZN|SE3,Marine - BZN|SE3,Nuclear - BZN|SE3,Other - BZN|SE3,Other renewable - BZN|SE3,Solar - BZN|SE3,Waste - BZN|SE3,Wind Offshore - BZN|SE3,Wind Onshore - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,n/e,824.0,0,5841.0,892.0,n/e,0.0,n/e,n/e,1062.0
1,01.01.2022 01:00 - 01.01.2022 02:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5842.0,851.0,n/e,0.0,n/e,n/e,999.0
2,01.01.2022 02:00 - 01.01.2022 03:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,n/e,818.0,0,5839.0,858.0,n/e,0.0,n/e,n/e,958.0
3,01.01.2022 03:00 - 01.01.2022 04:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,n/e,838.0,0,5839.0,859.0,n/e,0.0,n/e,n/e,888.0
4,01.01.2022 04:00 - 01.01.2022 05:00 (CET/CEST),0.0,n/e,n/e,0.0,n/e,n/e,n/e,n/e,n/e,...,n/e,842.0,0,5840.0,856.0,n/e,0.0,n/e,n/e,788.0


We have alot of columns which are used in other nations and zones that are redundant to us because there is no power generation from these sources in the zones we are looking at. Therefore, we select only the columns which keep information about power generation in each zone.

In [32]:
# Selecting only relevant columns
actual_generation_selected = actual_generation[['MTU (CET/CEST)', 'Biomass - BZN|NO1', 'Fossil Gas - BZN|NO1', 'Hydro Run-of-river and poundage - BZN|NO1_x', 'Hydro Water Reservoir - BZN|NO1', 'Waste - BZN|NO1', 'Wind Onshore - BZN|NO1', 'Fossil Gas - BZN|NO2', 'Hydro Pumped Storage Aggregated- BZN|NO2', 'Hydro Pumped Storage Consumption - BZN|NO2', 'Hydro Run-of-river and poundage - BZN|NO1_y', 'Hydro Water Reservoir - BZN|NO2', 'Waste - BZN|NO2', 'Wind Onshore - BZN|NO2', 'Fossil Gas - BZN|NO3', 'Hydro Pumped Storage Aggregated- BZN|NO3', 'Hydro Pumped Storage Consumption - BZN|NO3', 'Hydro Run-of-river and poundage - BZN|NO3', 'Hydro Water Reservoir - BZN|NO3', 'Other - BZN|NO3', 'Other renewable - BZN|NO3', 'Waste - BZN|NO3', 'Wind Onshore - BZN|NO3', 'Fossil Gas - BZN|NO5', 'Hydro Pumped Storage Aggregated- BZN|NO5', 'Hydro Pumped Storage Consumption - BZN|NO5', 'Hydro Run-of-river and poundage - BZN|NO5', 'Hydro Water Reservoir - BZN|NO5', 'Waste - BZN|NO5', 'Fossil Gas - BZN|SE3', 'Hydro Water Reservoir - BZN|SE3', 'Marine - BZN|SE3', 'Nuclear - BZN|SE3', 'Other - BZN|SE3', 'Solar - BZN|SE3', 'Wind Onshore - BZN|SE3']].copy()
# printing the head of the resulting dataframe
actual_generation_selected.head()

Unnamed: 0,MTU (CET/CEST),Biomass - BZN|NO1,Fossil Gas - BZN|NO1,Hydro Run-of-river and poundage - BZN|NO1_x,Hydro Water Reservoir - BZN|NO1,Waste - BZN|NO1,Wind Onshore - BZN|NO1,Fossil Gas - BZN|NO2,Hydro Pumped Storage Aggregated- BZN|NO2,Hydro Pumped Storage Consumption - BZN|NO2,...,Hydro Run-of-river and poundage - BZN|NO5,Hydro Water Reservoir - BZN|NO5,Waste - BZN|NO5,Fossil Gas - BZN|SE3,Hydro Water Reservoir - BZN|SE3,Marine - BZN|SE3,Nuclear - BZN|SE3,Other - BZN|SE3,Solar - BZN|SE3,Wind Onshore - BZN|SE3
0,01.01.2022 00:00 - 01.01.2022 01:00 (CET/CEST),0.0,0.0,899.0,353.0,9.0,139.0,0.0,1.0,,...,234.0,1618.0,19.0,0.0,824.0,0,5841.0,892.0,0.0,1062.0
1,01.01.2022 01:00 - 01.01.2022 02:00 (CET/CEST),0.0,0.0,878.0,321.0,8.0,172.0,0.0,1.0,,...,227.0,1759.0,19.0,0.0,818.0,0,5842.0,851.0,0.0,999.0
2,01.01.2022 02:00 - 01.01.2022 03:00 (CET/CEST),0.0,0.0,897.0,330.0,8.0,151.0,0.0,1.0,,...,216.0,1890.0,20.0,0.0,818.0,0,5839.0,858.0,0.0,958.0
3,01.01.2022 03:00 - 01.01.2022 04:00 (CET/CEST),0.0,0.0,885.0,275.0,8.0,160.0,0.0,1.0,,...,215.0,1460.0,20.0,0.0,838.0,0,5839.0,859.0,0.0,888.0
4,01.01.2022 04:00 - 01.01.2022 05:00 (CET/CEST),0.0,0.0,890.0,269.0,8.0,148.0,0.0,1.0,,...,209.0,1394.0,20.0,0.0,842.0,0,5840.0,856.0,0.0,788.0


### Data Aggregation for import and export


We want to concatenate data for cross border physical flow between NO1 and neighbouring zones(NO2,NO3,NO5,SE3).

In [33]:
# Importing import export datasets
import_export_no1_no2 = pd.read_csv("../datasets/import_and_export/NO1-NO2 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_no3 = pd.read_csv("../datasets/import_and_export/NO1-NO3 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_no5 = pd.read_csv("../datasets/import_and_export/NO1-NO5 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")
import_export_no1_se3 = pd.read_csv("../datasets/import_and_export/NO1-SE3 Import export Cross-Border Physical Flow_202201010000-202301010000.csv")

NO1_NO2_import_export.head()
In other data files the time unite is MTU (CET/CEST), this is not teh case for import and export data. To be consistent Time (CET/CEST) is changed to MTU (CET/CEST). To make it clear that 'BZN|NO2 > BZN|NO1 [MW]' shows the cross border physical flow , CBF is used as an abbreviation.

Printing out first rows of each dataset to see the structure

In [34]:
import_export_no1_no2.head()

Unnamed: 0,Time (CET/CEST),BZN|NO2 > BZN|NO1 [MW],BZN|NO1 > BZN|NO2 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1300.0,0.0
1,01.01.2022 01:00 - 01.01.2022 02:00,1242.0,0.0
2,01.01.2022 02:00 - 01.01.2022 03:00,1222.0,0.0
3,01.01.2022 03:00 - 01.01.2022 04:00,1679.0,0.0
4,01.01.2022 04:00 - 01.01.2022 05:00,1668.0,0.0


In [35]:
import_export_no1_no3.head()

Unnamed: 0,Time (CET/CEST),BZN|NO3 > BZN|NO1 [MW],BZN|NO1 > BZN|NO3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,384.0,0.0
1,01.01.2022 01:00 - 01.01.2022 02:00,371.0,0.0
2,01.01.2022 02:00 - 01.01.2022 03:00,383.0,0.0
3,01.01.2022 03:00 - 01.01.2022 04:00,362.0,0.0
4,01.01.2022 04:00 - 01.01.2022 05:00,324.0,0.0


In [36]:
import_export_no1_no5.head()

Unnamed: 0,Time (CET/CEST),BZN|NO5 > BZN|NO1 [MW],BZN|NO1 > BZN|NO5 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,568.0,0.0
1,01.01.2022 01:00 - 01.01.2022 02:00,594.0,0.0
2,01.01.2022 02:00 - 01.01.2022 03:00,676.0,0.0
3,01.01.2022 03:00 - 01.01.2022 04:00,436.0,0.0
4,01.01.2022 04:00 - 01.01.2022 05:00,345.0,0.0


In [37]:
import_export_no1_se3.head()

Unnamed: 0,Time (CET/CEST),BZN|SE3 > BZN|NO1 [MW],BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1038.0,0.0
1,01.01.2022 01:00 - 01.01.2022 02:00,1065.0,0.0
2,01.01.2022 02:00 - 01.01.2022 03:00,927.0,0.0
3,01.01.2022 03:00 - 01.01.2022 04:00,748.0,0.0
4,01.01.2022 04:00 - 01.01.2022 05:00,918.0,0.0


We see that the time column in the import export datasets is named "Time(CET/CEST)" while in the other datasets, the time column is named "MTU (CET/CET)". The time column will be renamed to "MTU (CET/CEST)" so that the datasets can be merged.

In [38]:
import_export_no1_no2.rename(columns = {'Time (CET/CEST)':'MTU (CET/CEST)', 'BZN|NO2 > BZN|NO1 [MW]' : 'CBF BZN|NO2 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO2 [MW]' : 'CBF BZN|NO1 > BZN|NO2 [MW]'}, inplace = True)
import_export_no1_no3.rename(columns = {'Time (CET/CEST)':'MTU (CET/CEST)', 'BZN|NO3 > BZN|NO1 [MW]' : 'CBF BZN|NO3 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO3 [MW]' : 'CBF BZN|NO1 > BZN|NO3 [MW]'}, inplace = True)
import_export_no1_no5.rename(columns = {'Time (CET/CEST)':'MTU (CET/CEST)', 'BZN|NO5 > BZN|NO1 [MW]' : 'CBF BZN|NO5 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|NO5 [MW]' : 'CBF BZN|NO1 > BZN|NO5 [MW]'}, inplace = True)
import_export_no1_se3.rename(columns = {'Time (CET/CEST)':'MTU (CET/CEST)', 'BZN|SE3 > BZN|NO1 [MW]' : 'CBF BZN|SE3 > BZN|NO1 [MW]', 'BZN|NO1 > BZN|SE3 [MW]' : 'CBF BZN|NO1 > BZN|SE3 [MW]'}, inplace = True)

Merging all dataframes for import and export


In [39]:
# Merging the import export datasets
import_export_no1_neighbours_dataframes = [import_export_no1_no2, import_export_no1_no3, import_export_no1_no5, import_export_no1_se3]
import_export_no1_neighbours = reduce(lambda left, right: pd.merge(left,right, on=['MTU (CET/CEST)']), import_export_no1_neighbours_dataframes)

# printing out the 5 first rows of the resulting dataframe
import_export_no1_neighbours.head()

Unnamed: 0,MTU (CET/CEST),CBF BZN|NO2 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO2 [MW],CBF BZN|NO3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO3 [MW],CBF BZN|NO5 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|NO5 [MW],CBF BZN|SE3 > BZN|NO1 [MW],CBF BZN|NO1 > BZN|SE3 [MW]
0,01.01.2022 00:00 - 01.01.2022 01:00,1300.0,0.0,384.0,0.0,568.0,0.0,1038.0,0.0
1,01.01.2022 01:00 - 01.01.2022 02:00,1242.0,0.0,371.0,0.0,594.0,0.0,1065.0,0.0
2,01.01.2022 02:00 - 01.01.2022 03:00,1222.0,0.0,383.0,0.0,676.0,0.0,927.0,0.0
3,01.01.2022 03:00 - 01.01.2022 04:00,1679.0,0.0,362.0,0.0,436.0,0.0,748.0,0.0
4,01.01.2022 04:00 - 01.01.2022 05:00,1668.0,0.0,324.0,0.0,345.0,0.0,918.0,0.0


### Aggregation of Water Reservoirs and Hydro Storage Plants

The water reservoir dataset is measured with a weekly frequency while the other datasets are measured hourly. In order to make the data match frequency of measurements of the other datasets, we will need to up sample the data thorough interpolation.  We will first map the weekly measurement to the first hour of each week, and then interpolate the intermediate measurement. In order to interpolate the measurements of the last week, we will add the measurement for the first week of the succeeding year (2023) to the dataset. this measurement  will be dropped from the final dataset. We will use spline interpolation to add smoothness to the interpolated curve, which will be more representative of water level fluctuations than a linear interpolation.

Reading in water levels as CSV with pandas library

To get a more precise data and filled out data we need to also involve the first week measurement of 2022

In [40]:
# Loading in dataset for 2022
water_level_NO1 = pd.read_csv("../datasets/water_level/NO1_2022_Water_Reservoirs_and_Hydro_Storage_Plants_202201030000-202301020000.csv")
water_level_NO2 = pd.read_csv("../datasets/water_level/NO2_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_NO3 = pd.read_csv("../datasets/water_level/NO3_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_NO5 = pd.read_csv("../datasets/water_level/NO5_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")
water_level_SE3 = pd.read_csv("../datasets/water_level/SE3_2022_Water_Reservoirs_and_Hydro_Storage Plants_202201030000-202301020000.csv")

# Reading in only the first week of 2023
water_level_NO1_2023 = pd.read_csv("../datasets/water_level/NO1_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO2_2023 = pd.read_csv("../datasets/water_level/NO2_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO3_2023 = pd.read_csv("../datasets/water_level/NO3_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_NO5_2023 = pd.read_csv("../datasets/water_level/NO5_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)
water_level_SE3_2023 = pd.read_csv("../datasets/water_level/SE3_2023_Water Reservoirs and Hydro Storage Plants_202301020000-202401010000.csv", nrows=1)

# Renaming the Week datapoint for all 2023 datasets because they are originally called "week 1" which would create confusion in the data
water_level_NO1_2023.at[0, 'Week'] = 'Week 53'
water_level_NO2_2023.at[0, 'Week'] = 'Week 53'
water_level_NO3_2023.at[0, 'Week'] = 'Week 53'
water_level_NO5_2023.at[0, 'Week'] = 'Week 53'
water_level_SE3_2023.at[0, 'Week'] = 'Week 53'

In [41]:
# Checking if only the row of the data for 2023 was read in.
water_level_NO1_2023.head()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1
0,Week 53,3741135


In [42]:
# Concatinating the dataset to add the first week of 2023 to the 2022 datasets. ignore index ignores the index number of the row in 2023 and gives it a new after being "appended"
water_level_NO1 = pd.concat([water_level_NO1, water_level_NO1_2023], ignore_index= True)
water_level_NO2 = pd.concat([water_level_NO2, water_level_NO2_2023], ignore_index= True)
water_level_NO3 = pd.concat([water_level_NO3, water_level_NO3_2023], ignore_index= True)
water_level_NO5 = pd.concat([water_level_NO5, water_level_NO5_2023], ignore_index= True)
water_level_SE3 = pd.concat([water_level_SE3, water_level_SE3_2023], ignore_index= True)

In [43]:
water_level_NO1.head()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1
0,Week 1,2955197
1,Week 2,2754532
2,Week 3,2556285
3,Week 4,2388301
4,Week 5,2165300


In [44]:
# Merging the import export datasets
water_level_dataframes = [water_level_NO1, water_level_NO2, water_level_NO3, water_level_NO5, water_level_SE3]
water_level_dataframes = reduce(lambda left, right: pd.merge(left,right, on=['Week']), water_level_dataframes)

# printing out the 5 first rows of the resulting dataframe
water_level_dataframes.head()

Unnamed: 0,Week,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,Week 1,2955197,16214535,5314677,8130917,1566000
1,Week 2,2754532,15749289,5423690,7834726,1475000
2,Week 3,2556285,15047132,5363892,7445563,1415000
3,Week 4,2388301,14733590,5186818,7172051,1361000
4,Week 5,2165300,14257951,4897580,6748563,1268000


In [45]:
start_date = '2022-01-01'
end_date = '2022-12-31'
week_date_timeframe = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='7D'))
week_date_timeframe.rename(columns={0: 'Week_start_date'}, inplace=True)
week_date_timeframe.head()

Unnamed: 0,Week_start_date
0,2022-01-01
1,2022-01-08
2,2022-01-15
3,2022-01-22
4,2022-01-29


In [46]:
joined_water_week = pd.concat([week_date_timeframe, water_level_dataframes], axis=1)
joined_water_week.head()

Unnamed: 0,Week_start_date,Week,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2022-01-01,Week 1,2955197,16214535,5314677,8130917,1566000
1,2022-01-08,Week 2,2754532,15749289,5423690,7834726,1475000
2,2022-01-15,Week 3,2556285,15047132,5363892,7445563,1415000
3,2022-01-22,Week 4,2388301,14733590,5186818,7172051,1361000
4,2022-01-29,Week 5,2165300,14257951,4897580,6748563,1268000


In [47]:
joined_water_week.drop('Week', axis=1, inplace=True)
joined_water_week.head()

Unnamed: 0,Week_start_date,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2022-01-01,2955197,16214535,5314677,8130917,1566000
1,2022-01-08,2754532,15749289,5423690,7834726,1475000
2,2022-01-15,2556285,15047132,5363892,7445563,1415000
3,2022-01-22,2388301,14733590,5186818,7172051,1361000
4,2022-01-29,2165300,14257951,4897580,6748563,1268000


In [48]:
joined_water_week['Week_start_date'] = pd.to_datetime(joined_water_week['Week_start_date'], format='%Y-%m-%d')
joined_water_week.head()

Unnamed: 0,Week_start_date,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
0,2022-01-01,2955197,16214535,5314677,8130917,1566000
1,2022-01-08,2754532,15749289,5423690,7834726,1475000
2,2022-01-15,2556285,15047132,5363892,7445563,1415000
3,2022-01-22,2388301,14733590,5186818,7172051,1361000
4,2022-01-29,2165300,14257951,4897580,6748563,1268000


In [49]:
joined_water_week['Week_start_date'] = pd.to_datetime(joined_water_week['Week_start_date'])
joined_water_week.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 6 columns):
 #   Column                               Non-Null Count  Dtype         
---  ------                               --------------  -----         
 0   Week_start_date                      53 non-null     datetime64[ns]
 1   Stored Energy Value [MWh] - BZN|NO1  53 non-null     int64         
 2   Stored Energy Value [MWh] - BZN|NO2  53 non-null     int64         
 3   Stored Energy Value [MWh] - BZN|NO3  53 non-null     int64         
 4   Stored Energy Value [MWh] - BZN|NO5  53 non-null     int64         
 5   Stored Energy Value [MWh] - BZN|SE3  53 non-null     int64         
dtypes: datetime64[ns](1), int64(5)
memory usage: 2.6 KB


In [50]:
joined_water_week.set_index('Week_start_date', inplace=True)

In [51]:
joined_water_week.index

DatetimeIndex(['2022-01-01', '2022-01-08', '2022-01-15', '2022-01-22',
               '2022-01-29', '2022-02-05', '2022-02-12', '2022-02-19',
               '2022-02-26', '2022-03-05', '2022-03-12', '2022-03-19',
               '2022-03-26', '2022-04-02', '2022-04-09', '2022-04-16',
               '2022-04-23', '2022-04-30', '2022-05-07', '2022-05-14',
               '2022-05-21', '2022-05-28', '2022-06-04', '2022-06-11',
               '2022-06-18', '2022-06-25', '2022-07-02', '2022-07-09',
               '2022-07-16', '2022-07-23', '2022-07-30', '2022-08-06',
               '2022-08-13', '2022-08-20', '2022-08-27', '2022-09-03',
               '2022-09-10', '2022-09-17', '2022-09-24', '2022-10-01',
               '2022-10-08', '2022-10-15', '2022-10-22', '2022-10-29',
               '2022-11-05', '2022-11-12', '2022-11-19', '2022-11-26',
               '2022-12-03', '2022-12-10', '2022-12-17', '2022-12-24',
               '2022-12-31'],
              dtype='datetime64[ns]', name='Wee

In [52]:
# upsamepled_df = joined_water_week.resample('D', on=joined_water_week['Week_start_data']).sum()
joined_water_week = joined_water_week.asfreq('H')

In [53]:
interpolated_df = joined_water_week.interpolate(method='spline', order=3)

In [55]:
interpolated_df.head()

Unnamed: 0_level_0,Stored Energy Value [MWh] - BZN|NO1,Stored Energy Value [MWh] - BZN|NO2,Stored Energy Value [MWh] - BZN|NO3,Stored Energy Value [MWh] - BZN|NO5,Stored Energy Value [MWh] - BZN|SE3
Week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01 00:00:00,2955197.0,16214540.0,5314677.0,8130917.0,1566000.0
2022-01-01 01:00:00,2954139.0,16214420.0,5315972.0,8130161.0,1565345.0
2022-01-01 02:00:00,2953078.0,16214250.0,5317259.0,8129388.0,1564691.0
2022-01-01 03:00:00,2952014.0,16214050.0,5318537.0,8128599.0,1564039.0
2022-01-01 04:00:00,2950949.0,16213800.0,5319807.0,8127794.0,1563388.0
