In [1]:
import math
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import plotly.express as px
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import datetime

In [None]:
#aktuelles Verzeichnis
os.chdir('C:/Users/Sven Konermann/Documents/GitHub/DataScience/Data')

#Dataframe öffnen
df = pd.read_csv('Indizes.csv')
df = df.drop(df.columns[0],axis=1)
#df.to_csv("Dax_aufbereitet.csv")
df.head()

In [None]:
df.info()

In [None]:
#Datum in das richtige Datenformat konvertieren
df['Datum'] = pd.to_datetime(df.Datum)
df.info()

## DATA EXPLORATION & PROCESSING

In [None]:
#Daten visualisieren
plt.figure(figsize=(16,8))
plt.title('Schluss Preis Historie DAX')
plt.plot(df['Datum'],df['Schluss DAX'])
plt.xlabel('Datum', fontsize=18)
plt.ylabel('Schluss Preis', fontsize=18)
plt.show()

In [None]:
#Visualisierung bzgl. der Tagesdifferenzen und der Verteilung
fig = make_subplots(rows=1, cols=2, column_widths=[0.6, 0.4])
fig.add_trace(go.Scatter(x= df.Datum, y=df['Schluss DAX'].diff(), name='l1'),
              row=1, col=1)
fig.add_trace(go.Histogram(x=df['Schluss DAX'].diff(), name='h1', histnorm='probability density'),
              row=1, col=2)
fig.update_layout( height=550, width=1130, title_text="Differenz zwischen dem Schlusskursen des DAX")
fig.update_xaxes(title_text="Zeit", row=1, col=1)   
fig.update_xaxes(title_text="Wert", row=1, col=2)
fig.update_yaxes(title_text="Wert", row=1, col=1)   
fig.update_yaxes(title_text="Wahrscheinlichkeitsdichte", row=1, col=2)
fig.show()

## Data Normalization

In [None]:
#Auszuwertende Trainingsdaten auswählen
data = df.filter(['Schluss DAX'])
dataset = data.values
training_data_len = math.ceil(len(dataset) * .8)
training_data_len

In [None]:
#Daten Normalisieren
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

scaled_data

In [None]:
#Splitten der Daten in x- & y-train
train_data = scaled_data[0:training_data_len ,:]
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    if i<= 60:
        print(x_train)
        print(y_train)
        print()

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1],1))
x_train.shape

## Model

In [None]:
# Konfiguration des Modells
model = Sequential()
model.add(LSTM(64, return_sequences=False, input_shape=(x_train.shape[1],1)))
model.add(Dense(32))
model.add(Dense(1))

model.summary()

In [None]:
# Model Kompilieren
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Model trainieren
model.fit(x_train, y_train, batch_size=1, epochs=1)

In [None]:
#Splitten der Daten in x- & y-test
test_data = scaled_data[training_data_len - 60: , :]
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])

In [None]:
x_test = np.array(x_test)

In [None]:
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))

In [None]:
# Vorhersage der Kurse & Inverse Transformation der normalisierten Daten
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)


In [None]:
# Messung Metriken: hier Root Mean Square Error
rmse = np.sqrt(np.mean(((predictions - y_test)**2)))
rmse

In [None]:
# Visualisierung der vorhergesagten Daten mit den realen Daten
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date')
plt.plot(train['Schluss DAX'])
plt.plot(valid[['Schluss DAX', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show

In [None]:
#Vergleich Realdaten mit Vorhergesagten Daten
valid

In [None]:
# Metrik: Score
score=(sum(abs(valid['Schluss DAX']-valid['Predictions'])/valid['Schluss DAX'])/len(valid['Schluss DAX']))*100
print(score)

In [None]:
# Metrik: Genauigkeit
acc_score=(1-sum(abs(valid['Schluss DAX']-valid['Predictions'])/valid['Schluss DAX'])/len(valid['Schluss DAX']))*100
print(acc_score)

In [None]:
from sklearn.metrics import explained_variance_score
from sklearn.metrics import max_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

#Ausgeben von Metriken für den Indize "DAX"
EVS = (explained_variance_score(valid['Schluss DAX'], valid['Predictions']))
ME = (max_error(valid['Schluss DAX'], valid['Predictions']))
MAE = (mean_absolute_error(valid['Schluss DAX'], valid['Predictions']))
MSE = np.sqrt(mean_squared_error(valid['Schluss DAX'], valid['Predictions'], squared=True))
RMSE = np.sqrt(mean_squared_error(valid['Schluss DAX'], valid['Predictions'], squared=False))
R2 = r2_score(valid['Schluss DAX'], valid['Predictions'])
print('Model: DAX | EVS: ', EVS)
print('Model: DAX | ME: ', ME)
print('Model: DAX | MAE: ', MAE)
print('Model: DAX | MSE ', MSE)
print('Model: DAX | RMSE ', RMSE)
print('Model: DAX | R2 ', R2)
print('Model: DAX | Genauigkeit', acc_score)
print('Model: DAX | Score', score)
print('----------------')

In [None]:
#Anzeige der Standard-Abweichung
std_nn = np.sqrt(sum((valid['Predictions'] - valid['Schluss DAX'])**2)/len(valid['Schluss DAX']))
print('Standard deviation: ', std_nn)