In [70]:
# imports
import numpy as np
import pandas as pd
from datetime import datetime


In [68]:
# define date parser
d_parser = lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')

# read csv file (set date column as index)
df = pd.read_csv(
    './data/energy-charts_Stromproduktion_und_Börsenstrompreise_in_Deutschland_2020_Excel.csv',
    delimiter=';', parse_dates=['Datum (MT+1)'], date_parser=d_parser
    # index_col='Datum (MT+1)'
)

energy_df = df.copy()

# rename date column
energy_df.rename(columns={'Datum (MT+1)': 'Datetime', 'Day Ahead Auktion (DE-LU)': 'Day Ahead Auktion/MWh' }, inplace=True)

# add Date column
energy_df['Date'] = energy_df['Datetime'].dt.date

# add weekday column
energy_df['Weekday'] = energy_df['Datetime'].dt.day_name()

# remove import saldo column, as all values are NaN
energy_df = energy_df.dropna(axis=1, how="all")

# Check if there is any na value in df
energy_df.replace('na', np.nan, inplace=True)
energy_df.replace('NA', np.nan, inplace=True)
energy_df.replace('Missing', np.nan, inplace=True)
print('NA', energy_df.isna().value_counts())

# drop duplicates
energy_df.drop_duplicates(inplace=True)

# set Date column as index
energy_df.set_index('Date', inplace=True)

NA Datetime  Kernenergie  Nicht Erneuerbar  Erneuerbar  Day Ahead Auktion/MWh  Date   Weekday
False     False        False             False       False                  False  False      35136
dtype: int64


In [69]:
energy_df.head(10)

Unnamed: 0_level_0,Datetime,Kernenergie,Nicht Erneuerbar,Erneuerbar,Day Ahead Auktion/MWh,Weekday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01,2020-01-01 00:00:00,8089.585,29700.989,14524.769,38.6,Wednesday
2020-01-01,2020-01-01 00:15:00,8087.935,29597.398,14407.463,38.6,Wednesday
2020-01-01,2020-01-01 00:30:00,8088.635,29433.049,14537.378,38.6,Wednesday
2020-01-01,2020-01-01 00:45:00,8086.715,29319.558,14695.598,38.6,Wednesday
2020-01-01,2020-01-01 01:00:00,8093.394,29202.803,14902.589,36.55,Wednesday
2020-01-01,2020-01-01 01:15:00,8090.135,29144.575,14898.388,36.55,Wednesday
2020-01-01,2020-01-01 01:30:00,8076.207,29123.509,14975.325,36.55,Wednesday
2020-01-01,2020-01-01 01:45:00,8064.899,29243.764,15021.521,36.55,Wednesday
2020-01-01,2020-01-01 02:00:00,8080.356,29402.351,15004.099,32.32,Wednesday
2020-01-01,2020-01-01 02:15:00,8072.587,29418.973,14981.647,32.32,Wednesday


In [53]:
energy_df.describe()
# what does it mean when the price is minus?

Unnamed: 0,Kernenergie,Nicht Erneuerbar,Erneuerbar,Day Ahead Auktion (DE-LU)
count,35136.0,35136.0,35136.0,35136.0
mean,6934.673246,33765.069651,27962.757734,30.471738
std,969.703507,11159.5453,12106.069379,17.501029
min,3739.551,11985.471,7809.697,-83.94
25%,6450.2045,25593.516,18140.90875,21.75
50%,6793.422,32771.112,26610.4735,30.99
75%,7834.915,39948.88725,36752.224,40.25
max,8185.779,68966.183,69026.97,200.04


In [71]:
energy_df.corr()

  energy_df.corr()


Unnamed: 0,Kernenergie,Nicht Erneuerbar,Erneuerbar,Day Ahead Auktion/MWh
Kernenergie,1.0,0.40768,-0.135737,0.372776
Nicht Erneuerbar,0.40768,1.0,-0.459684,0.796859
Erneuerbar,-0.135737,-0.459684,1.0,-0.5126
Day Ahead Auktion/MWh,0.372776,0.796859,-0.5126,1.0
