In [1]:
import yfinance as yf
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error
import numpy as np
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
from pmdarima.arima import auto_arima
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.models import Sequential, load_model
from keras import layers
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
import warnings
warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
def data_to_windowed_data(df, windows):
    close_prices = df["Close"]
    for i in range(windows - 1, -1, -1):
        column_name = f"Target_{i+1}"
        df.loc[:, column_name] = close_prices.shift(i+1)
    df.reset_index(inplace = True)
    df.rename(columns = {"Date":"Target Date"}, inplace = True)
    col = df.pop("Close")
    df.insert(loc= len(df.columns) , column= "Target", value= col)
    df.dropna(inplace = True)
    return df
def windowed_df_to_d_x_y(wdf):
    df_as_np = wdf.to_numpy()
    dates = df_as_np[:,0]
    pre_matrix = df_as_np[:, 1:-1]
    X = pre_matrix.reshape((len(dates), pre_matrix.shape[1], 1))
    Y = df_as_np[:, -1]
    return dates, X.astype(np.float32), Y.astype(np.float32)

In [12]:
def LSTM_real(hist_data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    hist_data[["Close"]] = scaler.fit_transform(hist_data[["Close"]])

    window_size = 50
    windowed_df = data_to_windowed_data(hist_data, window_size)
    dates, X, y = windowed_df_to_d_x_y(windowed_df)

    train_split = int(len(dates) * 0.8)
    dates_train, X_train, y_train = dates[:train_split], X[:train_split], y[:train_split]
    dates_val, X_val, y_val = dates[train_split:], X[train_split:], y[train_split:]

    model = Sequential([layers.Input((window_size, 1)),
                        layers.LSTM(64, return_sequences= False),
                        #layers.LSTM(units, return_sequences = True),
                        #layers.LSTM(units),
                        layers.Dense(32, activation='relu'),
                        layers.Dense(1)])

        #early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience = 5, mode='min')

    model.compile(loss='mse',
                optimizer=Adam(learning_rate=0.001),
                metrics=['mean_absolute_error'])
    
    checkpoint_filepath = 'best_model.h5'
    model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs = 80, verbose = 1, callbacks = [model_checkpoint_callback], batch_size =32) # callbacks =  [early_stopping]
    model = load_model(checkpoint_filepath)
    train_predict = model.predict(X_train)
    val_predict = model.predict(X_val)
    pre_last_window = X_val[-1]
    last_window = np.delete(pre_last_window, obj = [0])
    last_price = hist_data.iloc[-1][-1]
    last_window = np.append(last_window, last_price)
    last_window = np.reshape(last_window, (len(last_window), 1))
    predictions = []
    for iii in range(10):
        next_prediction = model.predict(np.array([last_window])).flatten()
        next_window = np.delete(last_window, obj = [0])
        next_window = np.append(next_window, next_prediction)
        last_window = np.reshape(next_window, (len(next_window), 1))
        real_prediction = scaler.inverse_transform(next_prediction.reshape(-1, 1))
        predictions.append(real_prediction)
    array_predictions = [predictions[ii][0][0] for ii in range(len(predictions))]
    forecast_df = pd.DataFrame(array_predictions, columns=["Predicted Price"])
    
    
    train_predictions = scaler.inverse_transform(train_predict)
    val_predictions = scaler.inverse_transform(val_predict)
    y_trained = y_train.reshape(len(y_train), 1)
    y_trained = scaler.inverse_transform(y_trained)
    y_valed = y_val.reshape(len(y_val), 1)
    y_valed = scaler.inverse_transform(y_valed)
    df_valid = pd.DataFrame(index= dates_val, columns=["True", "Pred"], data=np.hstack((y_valed, val_predictions)))
    df_train = pd.DataFrame(index= dates_train, columns=["True", "Pred"], data=np.hstack((y_trained, train_predictions)))
    
    
    return forecast_df, df_train, df_valid

In [13]:
data = yf.download("MDO.DE", period = "max")
data.drop(columns = ["Open", "High", "Low", "Volume", "Adj Close"], axis = 1, inplace = True)
end_zeitpunkte = ["2023-07-07"]
end_zeitpunkte_ts = [datetime.strptime(end_zeitpunkte[0], "%Y-%m-%d").date()]

#hier Zeitraum der historischen Daten anpassen:
time_horizont = 3
start_zeitpunkte_ts = [end_zeitpunkte_ts[0]-relativedelta(years=time_horizont)]
filtered_df = data.loc[start_zeitpunkte_ts[0]:end_zeitpunkte_ts[0]]
hist_data = filtered_df[: len(filtered_df)-10]
hist_data_for_function = hist_data.copy()
prog_data = filtered_df[len(filtered_df)-10 :]
index_list = prog_data.index.tolist()

forecast_df, df_train, df_valid = LSTM_real(hist_data_for_function)

forecast_df.set_index(pd.Index(index_list), inplace=True)
result = pd.concat([prog_data, forecast_df], axis=1)
true_data = hist_data.iloc[50:]

[*********************100%***********************]  1 of 1 completed
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80


Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [29]:
fig = px.line(template = "simple_white")
fig.add_trace(go.Scatter(x = true_data.index[-241:], y = true_data["Close"][-241:], mode = "lines", name = "Tatsächliche Daten", line_color = "red"))
fig.add_trace(go.Scatter(x = df_train.index[-100:], y = df_train["Pred"][-100:], mode = "lines", name = "Prognostizierte Trainingsdaten", line_color = "blue"))
fig.add_trace(go.Scatter(x = df_valid.index, y = df_valid["Pred"], mode = "lines", name = "Prognostizierte Validierungsdaten", line_color = "green"))
fig.update_layout(xaxis_title = "Datum", yaxis_title = "MC Donalds Kurspreis in EUR", legend=dict(x=0.5, y=1))
fig.write_image("Train_LSTM.pdf")


In [26]:
fig2 = px.line(template = "simple_white")
fig2.add_trace(go.Scatter(x = result.index.strftime("%Y-%m-%d"), y = result["Close"], mode = "lines", name = "Tatsächlicher Kurs", line_color = "red"))
fig2.add_trace(go.Scatter(x = result.index.strftime("%Y-%m-%d"), y = result["Predicted Price"], mode = "lines", name = "Prognose", line_color = "blue"))
fig2.update_layout(xaxis_title = "Datum", yaxis_title = "MC Donalds Kurspreis in EUR", xaxis= {"type": "category"}, legend=dict(x=0, y=1))
#fig2.write_image("Prog_LSTM.pdf")
