In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
!pip3 install graycode
import graycode



# Day ahead prices data

In [3]:
price_2015 = pd.read_csv('data/Day-ahead Prices_201501010000-201601010000.csv') 
price_2015 = price_2015.iloc[96:] # data of january 1 to 4 is missing so remove them
price_2016 = pd.read_csv('data/Day-ahead Prices_201601010000-201701010000.csv') 
price_2017 = pd.read_csv('data/Day-ahead Prices_201701010000-201801010000.csv') 
price_2018 = pd.read_csv('data/Day-ahead Prices_201801010000-201901010000.csv') 
price_2019 = pd.read_csv('data/Day-ahead Prices_201901010000-202001010000.csv') 
price_2020 = pd.read_csv('data/Day-ahead Prices_202001010000-202101010000.csv') 

In [4]:
# create 1 day ahead prices by combining all the years
day_ahead = pd.concat([price_2015, price_2016, price_2017, price_2018, price_2019, price_2020]).reset_index(drop=True)

In [5]:
day_ahead

Unnamed: 0,MTU (CET),Day-ahead Price [EUR/MWh]
0,05.01.2015 00:00 - 05.01.2015 01:00,36.56
1,05.01.2015 01:00 - 05.01.2015 02:00,34.94
2,05.01.2015 02:00 - 05.01.2015 03:00,32.19
3,05.01.2015 03:00 - 05.01.2015 04:00,28.05
4,05.01.2015 04:00 - 05.01.2015 05:00,28.04
...,...,...
52513,31.12.2020 19:00 - 31.12.2020 20:00,61.51
52514,31.12.2020 20:00 - 31.12.2020 21:00,56.79
52515,31.12.2020 21:00 - 31.12.2020 22:00,52.44
52516,31.12.2020 22:00 - 31.12.2020 23:00,51.86


In [6]:
day_ahead[day_ahead.isnull().any(axis=1)]

Unnamed: 0,MTU (CET),Day-ahead Price [EUR/MWh]
1994,29.03.2015 02:00 - 29.03.2015 03:00,
10731,27.03.2016 02:00 - 27.03.2016 03:00,
19468,26.03.2017 02:00 - 26.03.2017 03:00,
28205,25.03.2018 02:00 - 25.03.2018 03:00,
37110,31.03.2019 02:00 - 31.03.2019 03:00,
45847,29.03.2020 02:00 - 29.03.2020 03:00,


In [7]:
# interpolate missing day ahead price, for now using polynomial interpolation with degree 2
day_ahead['Day-ahead Price [EUR/MWh]'] = day_ahead['Day-ahead Price [EUR/MWh]'].interpolate(method='polynomial', order=2)

In [8]:
day_ahead['datetime'] = day_ahead['MTU (CET)'].str.split('-').str[0]
day_ahead['datetime'] = pd.to_datetime(day_ahead['datetime'], format="%d.%m.%Y %H:%M ")

In [9]:
day_ahead

Unnamed: 0,MTU (CET),Day-ahead Price [EUR/MWh],datetime
0,05.01.2015 00:00 - 05.01.2015 01:00,36.56,2015-01-05 00:00:00
1,05.01.2015 01:00 - 05.01.2015 02:00,34.94,2015-01-05 01:00:00
2,05.01.2015 02:00 - 05.01.2015 03:00,32.19,2015-01-05 02:00:00
3,05.01.2015 03:00 - 05.01.2015 04:00,28.05,2015-01-05 03:00:00
4,05.01.2015 04:00 - 05.01.2015 05:00,28.04,2015-01-05 04:00:00
...,...,...,...
52513,31.12.2020 19:00 - 31.12.2020 20:00,61.51,2020-12-31 19:00:00
52514,31.12.2020 20:00 - 31.12.2020 21:00,56.79,2020-12-31 20:00:00
52515,31.12.2020 21:00 - 31.12.2020 22:00,52.44,2020-12-31 21:00:00
52516,31.12.2020 22:00 - 31.12.2020 23:00,51.86,2020-12-31 22:00:00


In [10]:
day_ahead[day_ahead.isnull().any(axis=1)]

Unnamed: 0,MTU (CET),Day-ahead Price [EUR/MWh],datetime


# Weather data

In [11]:
weather = pd.read_json('data/parsed_weather.json')

In [12]:
# remove data outside of january 5 2015 to december 31 2020
weather['date'] = pd.to_datetime(weather['date'])
start_date = '2015-1-4'
end_date = '2020-12-31'
mask = (weather['date'] > start_date) & (weather['date'] <= end_date)
weather = weather.loc[mask]
weather['datetime'] = pd.to_datetime(weather['date'] + pd.to_timedelta(weather['time']/100, unit='H'))
weather = weather.replace({'time': {0: 2400}}) # convert 0 to 2400, for later convenience

In [13]:
# weather description to string from list
weather['weatherDesc'] = [', '.join(map(str, l)) for l in weather['weatherDesc']]

In [14]:
# converting categorical variables to numerical

# option 1: label encoding

# wind direction
winddir_encoder = LabelEncoder()
winddir_encoder.fit(weather['winddir16Point'])
weather['winddir16Point'] = winddir_encoder.transform(weather.winddir16Point)
#wind description
winddesc_encoder = LabelEncoder()
winddesc_encoder.fit(weather['weatherDesc'])
weather['weatherDesc'] = winddesc_encoder.transform(weather.weatherDesc)


# option 2: one-hot encoding
'''
one_hot_winddir = pd.get_dummies(weather['winddir16Point'], prefix='winddir_')
weather = weather.join(one_hot_winddir)
one_hot_weatherDesc = pd.get_dummies(weather['weatherDesc'], prefix='weatherDesc_')
weather = weather.join(one_hot_weatherDesc)
weather = weather.drop(['winddir16Point', 'weatherDesc'], axis=1) 
'''

"\none_hot_winddir = pd.get_dummies(weather['winddir16Point'], prefix='winddir_')\nweather = weather.join(one_hot_winddir)\none_hot_weatherDesc = pd.get_dummies(weather['weatherDesc'], prefix='weatherDesc_')\nweather = weather.join(one_hot_weatherDesc)\nweather = weather.drop(['winddir16Point', 'weatherDesc'], axis=1) \n"

In [15]:
weather

Unnamed: 0,date,time,tempC,windspeedKmph,winddirDegree,winddir16Point,weatherDesc,precipMM,humidity,pressure,HeatIndexC,FeelsLikeC,datetime
96,2015-01-05,2400,0,12,203,11,31,0.0,96,1037,0,-4,2015-01-05 00:00:00
97,2015-01-05,100,0,13,206,11,31,0.0,96,1036,0,-4,2015-01-05 01:00:00
98,2015-01-05,200,0,14,209,11,31,0.0,96,1036,0,-4,2015-01-05 02:00:00
99,2015-01-05,300,0,15,212,11,31,0.0,96,1035,0,-5,2015-01-05 03:00:00
100,2015-01-05,400,0,14,207,11,31,0.0,96,1035,0,-5,2015-01-05 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
52603,2020-12-31,1900,4,10,217,12,14,0.3,94,1000,4,2,2020-12-31 19:00:00
52604,2020-12-31,2000,4,12,225,12,14,0.2,93,1000,4,1,2020-12-31 20:00:00
52605,2020-12-31,2100,4,14,234,12,14,0.3,93,1001,4,0,2020-12-31 21:00:00
52606,2020-12-31,2200,3,12,269,13,14,0.2,92,1003,3,0,2020-12-31 22:00:00


# Merge two dataset

In [16]:
total = pd.merge(day_ahead, weather, how='outer', on='datetime')

In [17]:
# some hours have multiple because of summer/winter time changes
total['datetime'].value_counts()

2017-10-29 02:00:00    2
2018-10-28 02:00:00    2
2016-10-30 02:00:00    2
2020-10-25 02:00:00    2
2015-10-25 02:00:00    2
                      ..
2020-06-24 23:00:00    1
2017-03-09 12:00:00    1
2018-06-17 06:00:00    1
2020-09-28 06:00:00    1
2019-06-15 03:00:00    1
Name: datetime, Length: 52512, dtype: int64

In [18]:
total

Unnamed: 0,MTU (CET),Day-ahead Price [EUR/MWh],datetime,date,time,tempC,windspeedKmph,winddirDegree,winddir16Point,weatherDesc,precipMM,humidity,pressure,HeatIndexC,FeelsLikeC
0,05.01.2015 00:00 - 05.01.2015 01:00,36.56,2015-01-05 00:00:00,2015-01-05,2400,0,12,203,11,31,0.0,96,1037,0,-4
1,05.01.2015 01:00 - 05.01.2015 02:00,34.94,2015-01-05 01:00:00,2015-01-05,100,0,13,206,11,31,0.0,96,1036,0,-4
2,05.01.2015 02:00 - 05.01.2015 03:00,32.19,2015-01-05 02:00:00,2015-01-05,200,0,14,209,11,31,0.0,96,1036,0,-4
3,05.01.2015 03:00 - 05.01.2015 04:00,28.05,2015-01-05 03:00:00,2015-01-05,300,0,15,212,11,31,0.0,96,1035,0,-5
4,05.01.2015 04:00 - 05.01.2015 05:00,28.04,2015-01-05 04:00:00,2015-01-05,400,0,14,207,11,31,0.0,96,1035,0,-5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52513,31.12.2020 19:00 - 31.12.2020 20:00,61.51,2020-12-31 19:00:00,2020-12-31,1900,4,10,217,12,14,0.3,94,1000,4,2
52514,31.12.2020 20:00 - 31.12.2020 21:00,56.79,2020-12-31 20:00:00,2020-12-31,2000,4,12,225,12,14,0.2,93,1000,4,1
52515,31.12.2020 21:00 - 31.12.2020 22:00,52.44,2020-12-31 21:00:00,2020-12-31,2100,4,14,234,12,14,0.3,93,1001,4,0
52516,31.12.2020 22:00 - 31.12.2020 23:00,51.86,2020-12-31 22:00:00,2020-12-31,2200,3,12,269,13,14,0.2,92,1003,3,0


# Time variable

In [19]:
time = (total['time'].values/100).astype(int)

In [20]:
# incremental representation of time
time_increment = time/10

In [21]:
# gray code binary
time_gray_code = np.empty([len(time), 5])
for i in range(len(time)):
    gray_code_str = '{:05b}'.format(graycode.tc_to_gray_code(time[i]))
    time_gray_code[i] = np.array(list(gray_code_str)).astype(np.int8)

In [22]:
# mutually exclusive binary representation
time_exclusive = np.zeros([len(time), 24])
for i in range(len(time)):
    time_exclusive[i][time[i] - 1] = 1
time_exclusive = time_exclusive[:,::-1] # reverse array to correspond to binary representation

In [23]:
print(time_increment)
print(time_gray_code)
print(time_exclusive)

[2.4 0.1 0.2 ... 2.1 2.2 2.3]
[[1. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 1.]
 ...
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 0. 1.]
 [1. 1. 1. 0. 0.]]
[[1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]
