# Day-ahead Price Exploratory Data Analysis

## Concatenating years 2019-2022

In [9]:
import pandas as pd
import numpy as np 
import matplotlib as plt

# concat year files to one dataframe
dayahead = pd.DataFrame(pd.read_csv("day-ahead-prices/Day-ahead_Prices_201901010000-202001010000.csv", header=0))
dayahead = pd.concat([dayahead, pd.read_csv("day-ahead-prices/Day-ahead_Prices_202001010000-202101010000.csv", header=0)])
dayahead = pd.concat([dayahead, pd.read_csv("day-ahead-prices/Day-ahead_Prices_202101010000-202201010000.csv", header=0)])
dayahead = pd.concat([dayahead, pd.read_csv("day-ahead-prices/Day-ahead_Prices_202201010000-202301010000.csv", header=0)])

# create datetime value of every beginning hour
dayahead['datetime'] = dayahead['MTU (UTC)'].str[0:16]
dayahead['datetime'] = pd.to_datetime(dayahead['datetime'], format='%d.%m.%Y %H:%M')
dayahead.set_index('datetime', inplace=True)
np.shape(dayahead)


TypeError: Cannot interpret '                                               MTU (UTC)  \
datetime                                                   
2019-01-01 00:00:00  01.01.2019 00:00 - 01.01.2019 01:00   
2019-01-01 01:00:00  01.01.2019 01:00 - 01.01.2019 02:00   
2019-01-01 02:00:00  01.01.2019 02:00 - 01.01.2019 03:00   
2019-01-01 03:00:00  01.01.2019 03:00 - 01.01.2019 04:00   
2019-01-01 04:00:00  01.01.2019 04:00 - 01.01.2019 05:00   
...                                                  ...   
2022-12-31 19:00:00  31.12.2022 19:00 - 31.12.2022 20:00   
2022-12-31 20:00:00  31.12.2022 20:00 - 31.12.2022 21:00   
2022-12-31 21:00:00  31.12.2022 21:00 - 31.12.2022 22:00   
2022-12-31 22:00:00  31.12.2022 22:00 - 31.12.2022 23:00   
2022-12-31 23:00:00  31.12.2022 23:00 - 01.01.2023 00:00   

                    Day-ahead Price [EUR/MWh] Currency  BZN|FI  
datetime                                                        
2019-01-01 00:00:00                     10.07      EUR     NaN  
2019-01-01 01:00:00                     10.03      EUR     NaN  
2019-01-01 02:00:00                      4.56      EUR     NaN  
2019-01-01 03:00:00                      4.83      EUR     NaN  
2019-01-01 04:00:00                      8.09      EUR     NaN  
...                                       ...      ...     ...  
2022-12-31 19:00:00                         -      NaN     NaN  
2022-12-31 20:00:00                         -      NaN     NaN  
2022-12-31 21:00:00                         -      NaN     NaN  
2022-12-31 22:00:00                         -      NaN     NaN  
2022-12-31 23:00:00                         -      NaN     NaN  

[35064 rows x 4 columns]' as a data type

## Look for data tail

In [40]:
print(dayahead.head())

                                               MTU (UTC)  \
datetime                                                   
2019-01-01 00:00:00  01.01.2019 00:00 - 01.01.2019 01:00   
2019-01-01 01:00:00  01.01.2019 01:00 - 01.01.2019 02:00   
2019-01-01 02:00:00  01.01.2019 02:00 - 01.01.2019 03:00   
2019-01-01 03:00:00  01.01.2019 03:00 - 01.01.2019 04:00   
2019-01-01 04:00:00  01.01.2019 04:00 - 01.01.2019 05:00   

                    Day-ahead Price [EUR/MWh] Currency  BZN|FI  
datetime                                                        
2019-01-01 00:00:00                     10.07      EUR     NaN  
2019-01-01 01:00:00                     10.03      EUR     NaN  
2019-01-01 02:00:00                      4.56      EUR     NaN  
2019-01-01 03:00:00                      4.83      EUR     NaN  
2019-01-01 04:00:00                      8.09      EUR     NaN  


## Find last non-NaN value

In [4]:
print(dayahead.iloc[1500])
print(dayahead.iloc[2000])
print(dayahead.iloc[1970])

MTU (UTC)                    04.03.2019 12:00 - 04.03.2019 13:00
Day-ahead Price [EUR/MWh]                                  52.41
Currency                                                     EUR
BZN|FI                                                       NaN
Name: 2019-03-04 12:00:00, dtype: object
MTU (UTC)                    25.03.2019 08:00 - 25.03.2019 09:00
Day-ahead Price [EUR/MWh]                                   41.9
Currency                                                     EUR
BZN|FI                                                       NaN
Name: 2019-03-25 08:00:00, dtype: object
MTU (UTC)                    24.03.2019 02:00 - 24.03.2019 03:00
Day-ahead Price [EUR/MWh]                                   32.2
Currency                                                     EUR
BZN|FI                                                       NaN
Name: 2019-03-24 02:00:00, dtype: object


## Export joined day-ahead data as parquet file

Change price column name to be more simple and datatype to float

In [20]:
import pyarrow as pa

dayahead.rename(columns = {'Day-ahead Price [EUR/MWh]':'price'}, inplace = True)
dayahead['price'] = dayahead['price'].replace('-', np.nan).astype(float)
print(np.dtype(dayahead['price']))

table = pa.Table.from_pandas(dayahead)
pa.parquet.write_table(table, 'processed_data/dayahead.parquet')


float64
