# Import librarys

In [1]:
from keras.models import load_model
from keras.models import Model
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error
import keras
import yfinance as yf
from matplotlib.backends.backend_pdf import PdfPages
import pickle
import json
import math

# read predicting sequence

In [2]:
def download_stock_data(stock_symbols, start_date, end_date, output_file):
    with open(output_file, 'w') as f:
        # write column headings
        f.write('Date,Open,High,Low,Close,Volume\n')
        
        #for symbol in stock_symbols:
            # load share data for the symbol (Share name) and the given time period
        stock_data = yf.download(symbol, start=start_date, end=end_date)
        stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]  # Auswahl der gewünschten Spalten
        stock_data.to_csv(f, header=False)  # Schreiben der Daten in die Datei

# load time sequence

In [3]:
excel_file_path = 'settings/lastSequenceForPrediction.xlsx'

# store index of the excel file in this txt file
index_file_path = 'settings/lastSequenceForPrediction.txt'

# read index from the from the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0

# read excel file
df_lastSeq = pd.read_excel(excel_file_path)

# Check, if index is out of bounds
if current_row_index >= len(df_lastSeq):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract data fom the actual row
    start_date_lastSeq = df_lastSeq.loc[current_row_index, 'start_date']
    end_date_lastSeq = df_lastSeq.loc[current_row_index, 'end_date']

    # Output of the actual data 
    print(f'Startdatum: {start_date_lastSeq}, Enddatum: {end_date_lastSeq}')

Startdatum: 2018-10-01, Enddatum: 2019-05-31


In [4]:
# Enter symbols
stock_symbols = ['ALV.DE', 'DBK.DE', 'VOW3.DE', 'BMW.DE', 'ADS.DE', 'BEI.DE', 'DTE.SG', 'SAP.DE', '1COV.DE', 'BAS.DE', 'EOAN.DE', 'RWE.DE']
start_date = start_date_lastSeq
end_date = end_date_lastSeq

# load and store the data for every share
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # read CSV and select select the desired columns
    df = pd.read_csv(output_file, usecols=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])
    
    # Search CSV file for empty lines and remove them
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # filter the empty lines
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the adjusted rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [5]:
# store the data frames for different symbols in the dictionary
dfPre = {}

for symbol in stock_symbols:
    # load CSV
    dfPre[symbol] = pd.read_csv(f'stock_data_{symbol}.csv')

    # Drop empty rows
    dfPre[symbol].dropna(inplace=True)

    # transform date to index
    dfPre[symbol].set_index('Date', inplace=True)

    # Check the number of rows
    num_rows = dfPre[symbol].shape[0]

    # If more than 60 rows, keep the last 60 rows
    if num_rows > 60:
        dfPre[symbol] = dfPre[symbol].tail(60)


In [6]:
# Check missing values
missing_values_beforePre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# missing values are replaced with teh mean of the row before and after the actual row
for symbol in stock_symbols:
    for column in dfPre[symbol].columns:
        missing_valuesPre = dfPre[symbol][column].isnull()
        dfPre[symbol].loc[missing_valuesPre, column] = (dfPre[symbol][column].shift() + dfPre[symbol][column].shift(-1)) / 2

# Check if there are still any missing values 
missing_values_afterPre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# Output of the missing values before and after the Treatment
for symbol in stock_symbols:
    print(f"Fehlende Werte vor der Behandlung für {symbol} gefunden:", missing_values_beforePre[symbol])
    print(f"Fehlende Werte nach der Behandlung für {symbol} gefunden:", missing_values_afterPre[symbol])

# Output of the length of the data frame for all symbols
for symbol in stock_symbols:
    print(f"Länge des Datensatzes für {symbol}:", len(dfPre[symbol]))


Fehlende Werte vor der Behandlung für ALV.DE gefunden: False
Fehlende Werte nach der Behandlung für ALV.DE gefunden: False
Fehlende Werte vor der Behandlung für DBK.DE gefunden: False
Fehlende Werte nach der Behandlung für DBK.DE gefunden: False
Fehlende Werte vor der Behandlung für VOW3.DE gefunden: False
Fehlende Werte nach der Behandlung für VOW3.DE gefunden: False
Fehlende Werte vor der Behandlung für BMW.DE gefunden: False
Fehlende Werte nach der Behandlung für BMW.DE gefunden: False
Fehlende Werte vor der Behandlung für ADS.DE gefunden: False
Fehlende Werte nach der Behandlung für ADS.DE gefunden: False
Fehlende Werte vor der Behandlung für BEI.DE gefunden: False
Fehlende Werte nach der Behandlung für BEI.DE gefunden: False
Fehlende Werte vor der Behandlung für DTE.SG gefunden: False
Fehlende Werte nach der Behandlung für DTE.SG gefunden: False
Fehlende Werte vor der Behandlung für SAP.DE gefunden: False
Fehlende Werte nach der Behandlung für SAP.DE gefunden: False
Fehlende Werte

# Data normalization prediction data

In [7]:
# load scaler
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
    
# min-max scale factors
min_value = scaler.data_min_
max_value = scaler.data_max_
# normalize the data for all symbols
nfPre = {}
nfPre_normalized = {}
for symbol in stock_symbols:
    # Copy the data fram and remove the column "Date" for every symbol
    nfPre[symbol] = dfPre[symbol].copy()
    
    # Remove the index name 
    nfPre[symbol].index.name = None

    # normalize the data for every symbol, except for the date (Index)
    nfPre_normalized[symbol] = scaler.transform(nfPre[symbol])

    # Create a new data frame with the normalized data and the original index for every symbol
    nfPre[symbol] = pd.DataFrame(nfPre_normalized[symbol], columns=nfPre[symbol].columns, index=nfPre[symbol].index)

    # Convert DataFrame to NumPy array for every symbol
    nfPre[symbol] = nfPre[symbol].to_numpy()
    

# Daten preparation

In [8]:
# Store X_pre for every symbol in the dictionary
X_pre_dict = {}

# Iterate over each symbol
for symbol in stock_symbols:
    
    nfPre_symbol = nfPre[symbol]
    
    # Define size for the current symbol
    nfPre_size_symbol = len(nfPre_symbol)
    
    # Add external dimension
    X_pre_symbol = np.expand_dims(nfPre_symbol[:nfPre_size_symbol], axis=0)
    
    # Transform to Tensor
    X_pre_symbol_tensor = tf.convert_to_tensor(X_pre_symbol, dtype=tf.float32)
    
    # Store in the dictionary
    X_pre_dict[symbol] = X_pre_symbol_tensor

# Model importieren

In [9]:
loaded_models = {}

for symbol in stock_symbols:
    # Load the stored model
    loaded_models[symbol] = load_model(f'Predict20Days_{symbol}.h5.keras')

In [10]:
print(loaded_models)

{'ALV.DE': <Sequential name=sequential_2, built=True>, 'DBK.DE': <Sequential name=sequential, built=True>, 'VOW3.DE': <Sequential name=sequential, built=True>, 'BMW.DE': <Sequential name=sequential, built=True>, 'ADS.DE': <Sequential name=sequential_1, built=True>, 'BEI.DE': <Sequential name=sequential, built=True>, 'DTE.SG': <Sequential name=sequential, built=True>, 'SAP.DE': <Sequential name=sequential, built=True>, '1COV.DE': <Sequential name=sequential, built=True>, 'BAS.DE': <Sequential name=sequential, built=True>, 'EOAN.DE': <Sequential name=sequential, built=True>, 'RWE.DE': <Sequential name=sequential, built=True>}


# Prediction for the unkonown sequence and transform back into monetary values

In [11]:
def prediction(symbol):
    # Predict the current share(symbol)
    predictions = loaded_models[symbol].predict(X_pre_dict[symbol])
    return predictions

In [12]:
# create dictionary to store the prediction for every symbol
predictions_dict = {}
predictions = {}

# Iterate over every symbol
for symbol in stock_symbols:

    # Prediction for the actual symbol
    predictions[symbol] = prediction(symbol)

    # Inverse transformation of the prediction
    predictions_original_scale_symbol = scaler.inverse_transform(predictions[symbol])

    # extract the prediction 20 days in the future
    prediction_20_days_ahead_symbol = predictions_original_scale_symbol[0][-2]

    # store the prediction for the current symbol
    predictions_dict[symbol] = {
        'prediction_20_days_ahead': prediction_20_days_ahead_symbol
    }

    # Output the values of the inverse transformation
    print(f"Zurücktransformierte Vorhersagen für {symbol}:")
    print(predictions_original_scale_symbol)
    print(prediction_20_days_ahead_symbol)
print(predictions_dict)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
Zurücktransformierte Vorhersagen für ALV.DE:
[[1.8803723e+02 1.8639368e+02 1.8502802e+02 1.8077202e+02 6.3835994e+05]]
180.77202
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step
Zurücktransformierte Vorhersagen für DBK.DE:
[[ 7.5865121e+00 -3.9413073e+00 -1.3418933e+01  5.8028978e-01
   1.8261014e+06]]
0.5802898
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
Zurücktransformierte Vorhersagen für VOW3.DE:
[[1.3076035e+02 1.3058652e+02 1.3011418e+02 1.3213867e+02 9.5613125e+05]]
132.13867
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
Zurücktransformierte Vorhersagen für BMW.DE:
[[6.5791161e+01 6.6576927e+01 6.7476204e+01 6.6982307e+01 1.3963981e+06]]
66.98231
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step
Zurücktransformierte Vorhersagen für ADS.DE:
[[ 1.8111066e+02  1.8797861e+02  1.9331647e+02  1.9387920e+02
  -1.

# read test CSV from 2019

In [13]:
excel_file_path = 'settings/actualMonth_startEnd.xlsx'

# path to file with the stored index
index_file_path = 'settings/actualMonthIndex.txt'

# read the current index form the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0
print(current_row_index)
# read Excel file
df = pd.read_excel(excel_file_path)

# Check, if index is out of bounds
if current_row_index >= len(df):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract the data from the current row
    start_date2019 = df.loc[current_row_index, 'start_date']
    end_date2019 = df.loc[current_row_index, 'end_date']

    # Output the current data
    print(f'Startdatum: {start_date2019}, Enddatum: {end_date2019}')

5
Startdatum: 2019-06-01, Enddatum: 2019-06-30


In [14]:
# time period
start_date = start_date2019
end_date = end_date2019
print(start_date)
print(end_date)

# load and store the data for every symbol
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}_2019.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # read file
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the data with the cleaned rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


2019-06-01
2019-06-30


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [15]:
# load data for every symbol
for symbol in stock_symbols:
    # create name for the CSV
    csv_file = f'stock_data_{symbol}_2019.csv'
    
    # Load CSV
    df = pd.read_csv(csv_file)
    
    # Set date as index
    df.set_index('Date', inplace=True)
    
    # Select the desired column
    selected_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = df[selected_columns]
    
    # show data
    print(f"Daten für Symbol {symbol}:")
    print(df.head())


Daten für Symbol ALV.DE:
                  Open        High         Low       Close   Volume
Date                                                               
2019-06-03  197.100006  199.380005  196.339996  199.160004  1057670
2019-06-04  198.199997  203.550003  197.919998  203.399994  1164056
2019-06-05  202.000000  203.250000  201.449997  202.300003   946091
2019-06-06  202.399994  204.350006  201.850006  202.149994   955153
2019-06-07  202.100006  204.699997  202.100006  204.300003  1012695
Daten für Symbol DBK.DE:
             Open   High    Low  Close    Volume
Date                                            
2019-06-03  6.000  6.010  5.801  5.975  22897840
2019-06-04  5.950  6.241  5.940  6.241  19267974
2019-06-05  6.250  6.283  6.082  6.138  12611897
2019-06-06  6.123  6.204  5.863  5.958  20493968
2019-06-07  5.982  6.028  5.927  5.966  10758861
Daten für Symbol VOW3.DE:
                  Open        High         Low       Close   Volume
Date                                 

# Compare predictions to actual share values

In [16]:
# list to store the mse (mean squared error) for every symbol
mse_list = []

# list to store the actual and predicted value for every symbol
all_actual_values = []
all_predicted_values = []

# Comparison of the actual and the predicted value for every symbol
for symbol in stock_symbols:
    # create name for the CSV
    csv_file = f'stock_data_{symbol}_2019.csv'
    
    # load CSV
    df = pd.read_csv(csv_file)
    
    # set date as index
    df.set_index('Date', inplace=True)
    
    # extract last row
    last_row = df.iloc[-1]
    
    # Extract the value from the "Close" column of the last row
    actual_close = last_row['Close']
    
    # Calculate the mean squared error (mse)
    mse = mean_squared_error([actual_close], [predictions_dict[symbol]['prediction_20_days_ahead']])
    
    # Add mse to the list
    mse_list.append(mse)
    
    # Create a data frame with just one row for the actual and the predicted "Close" value
    comparison_df = pd.DataFrame({'Actual': [actual_close], 'Predicted': predictions_dict[symbol]['prediction_20_days_ahead']}, index=[last_row.name])
    
    # Output of the comparison
    print(f"Vergleich für Symbol {symbol}:")
    print(comparison_df)
    print(f"MSE für Symbol {symbol}: {mse}")
    print("\n")
    
    # Add the actual and predicted value to the main list
    all_actual_values.append(actual_close)
    all_predicted_values.append(predictions_dict[symbol]['prediction_20_days_ahead'])
    
# calculate the mean squared error (mse) for all predictions
total_mse = mean_squared_error(all_actual_values, all_predicted_values)
standard_deviation = math.sqrt(total_mse)
standard_deviation_share = standard_deviation / 12

# Output of the mse for all values
print(f"Gesamter Mean Squared Error für alle Vorhersagen: {total_mse}")
print(f"Gesamte Standardabweichung für alle Vorhersagen: {standard_deviation}")
print(f"Gesamte Standardabweichung pro Aktie: {standard_deviation_share}")

# Define custom encoder class to handle float32 values
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.float32):
            return float(obj)
        return super(NumpyEncoder, self).default(obj)

# Convert NumPy arrays in lists (if present)
predictions_dict_serializable = {symbol: predictions.tolist() if isinstance(predictions, np.ndarray) else predictions for symbol, predictions in predictions_dict.items()}
print(predictions_dict_serializable)
# extract predictions for the mean variance model
json_file = 'settings/predictions_dict.json'

# Write array in JSON file
with open(json_file, 'w') as f:
    json.dump(predictions_dict_serializable, f, cls=NumpyEncoder)

Vergleich für Symbol ALV.DE:
            Actual   Predicted
2019-06-28   212.0  180.772018
MSE für Symbol ALV.DE: 975.1868327728007


Vergleich für Symbol DBK.DE:
            Actual  Predicted
2019-06-28    6.78    0.58029
MSE für Symbol DBK.DE: 38.43640939991445


Vergleich für Symbol VOW3.DE:
                Actual   Predicted
2019-06-28  148.220001  132.138672
MSE für Symbol VOW3.DE: 258.6091535249725


Vergleich für Symbol BMW.DE:
               Actual  Predicted
2019-06-28  65.089996  66.982307
MSE für Symbol BMW.DE: 3.5808412847691216


Vergleich für Symbol ADS.DE:
            Actual   Predicted
2019-06-28   271.5  193.879196
MSE für Symbol ADS.DE: 6024.98918768228


Vergleich für Symbol BEI.DE:
                Actual  Predicted
2019-06-28  105.550003  99.765457
MSE für Symbol BEI.DE: 33.46097125112994


Vergleich für Symbol DTE.SG:
            Actual  Predicted
2019-06-28   13.69  27.065741
MSE für Symbol DTE.SG: 178.91044743133625


Vergleich für Symbol SAP.DE:
                