# Import librarys

In [1]:
from keras.models import load_model
from keras.models import Model
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error
import keras
import yfinance as yf
from matplotlib.backends.backend_pdf import PdfPages
import pickle
import json
import math

# read predicting sequence

In [2]:
def download_stock_data(stock_symbols, start_date, end_date, output_file):
    with open(output_file, 'w') as f:
        # write column headings
        f.write('Date,Open,High,Low,Close,Volume\n')
        
        #for symbol in stock_symbols:
            # load share data for the symbol (Share name) and the given time period
        stock_data = yf.download(symbol, start=start_date, end=end_date)
        stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]  # Auswahl der gewünschten Spalten
        stock_data.to_csv(f, header=False)  # Schreiben der Daten in die Datei

# load time sequence

In [3]:
# Enter symbols
stock_symbols = ['ALV.DE', 'DBK.DE', 'VOW3.DE', 'BMW.DE', 'ADS.DE', 'BEI.DE', 'DTE.SG', 'SAP.DE', '1COV.DE', 'BAS.DE', 'EOAN.DE', 'RWE.DE']
start_date = '2018-10-01'
end_date = '2018-12-31'

# load and store the data for every share
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # read CSV and select select the desired columns
    df = pd.read_csv(output_file, usecols=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])
    
    # Search CSV file for empty lines and remove them
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # filter the empty lines
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the adjusted rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
# store the data frames for different symbols in the dictionary
dfPre = {}

for symbol in stock_symbols:
    # load CSV
    dfPre[symbol] = pd.read_csv(f'stock_data_{symbol}.csv')

    # Drop empty rows
    dfPre[symbol].dropna(inplace=True)

    # transform date to index
    dfPre[symbol].set_index('Date', inplace=True)

    # Check the number of rows
    num_rows = dfPre[symbol].shape[0]

    # If more than 60 rows, keep the last 60 rows
    if num_rows > 60:
        dfPre[symbol] = dfPre[symbol].tail(60)


In [5]:
# Check missing values
missing_values_beforePre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# missing values are replaced with teh mean of the row before and after the actual row
for symbol in stock_symbols:
    for column in dfPre[symbol].columns:
        missing_valuesPre = dfPre[symbol][column].isnull()
        dfPre[symbol].loc[missing_valuesPre, column] = (dfPre[symbol][column].shift() + dfPre[symbol][column].shift(-1)) / 2

# Check if there are still any missing values 
missing_values_afterPre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# Output of the missing values before and after the Treatment
for symbol in stock_symbols:
    print(f"Fehlende Werte vor der Behandlung für {symbol} gefunden:", missing_values_beforePre[symbol])
    print(f"Fehlende Werte nach der Behandlung für {symbol} gefunden:", missing_values_afterPre[symbol])

# Output of the length of the data frame for all symbols
for symbol in stock_symbols:
    print(f"Länge des Datensatzes für {symbol}:", len(dfPre[symbol]))


Fehlende Werte vor der Behandlung für ALV.DE gefunden: False
Fehlende Werte nach der Behandlung für ALV.DE gefunden: False
Fehlende Werte vor der Behandlung für DBK.DE gefunden: False
Fehlende Werte nach der Behandlung für DBK.DE gefunden: False
Fehlende Werte vor der Behandlung für VOW3.DE gefunden: False
Fehlende Werte nach der Behandlung für VOW3.DE gefunden: False
Fehlende Werte vor der Behandlung für BMW.DE gefunden: False
Fehlende Werte nach der Behandlung für BMW.DE gefunden: False
Fehlende Werte vor der Behandlung für ADS.DE gefunden: False
Fehlende Werte nach der Behandlung für ADS.DE gefunden: False
Fehlende Werte vor der Behandlung für BEI.DE gefunden: False
Fehlende Werte nach der Behandlung für BEI.DE gefunden: False
Fehlende Werte vor der Behandlung für DTE.SG gefunden: False
Fehlende Werte nach der Behandlung für DTE.SG gefunden: False
Fehlende Werte vor der Behandlung für SAP.DE gefunden: False
Fehlende Werte nach der Behandlung für SAP.DE gefunden: False
Fehlende Werte

# Data normalization prediction data

In [6]:
# load scaler
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
    
# min-max scale factors
min_value = scaler.data_min_
max_value = scaler.data_max_
# normalize the data for all symbols
nfPre = {}
nfPre_normalized = {}
for symbol in stock_symbols:
    # Copy the data fram and remove the column "Date" for every symbol
    nfPre[symbol] = dfPre[symbol].copy()
    
    # Remove the index name 
    nfPre[symbol].index.name = None

    # normalize the data for every symbol, except for the date (Index)
    nfPre_normalized[symbol] = scaler.transform(nfPre[symbol])

    # Create a new data frame with the normalized data and the original index for every symbol
    nfPre[symbol] = pd.DataFrame(nfPre_normalized[symbol], columns=nfPre[symbol].columns, index=nfPre[symbol].index)

    # Convert DataFrame to NumPy array for every symbol
    nfPre[symbol] = nfPre[symbol].to_numpy()
    

# Daten preparation

In [7]:
# Store X_pre for every symbol in the dictionary
X_pre_dict = {}

# Iterate over each symbol
for symbol in stock_symbols:
    
    nfPre_symbol = nfPre[symbol]
    
    # Define size for the current symbol
    nfPre_size_symbol = len(nfPre_symbol)
    
    # Add external dimension
    X_pre_symbol = np.expand_dims(nfPre_symbol[:nfPre_size_symbol], axis=0)
    
    # Transform to Tensor
    X_pre_symbol_tensor = tf.convert_to_tensor(X_pre_symbol, dtype=tf.float32)
    
    # Store in the dictionary
    X_pre_dict[symbol] = X_pre_symbol_tensor

# Model importieren

In [8]:
loaded_models = {}

for symbol in stock_symbols:
    # Load the stored model
    loaded_models[symbol] = load_model(f'Predict1Year_{symbol}.h5.keras')

In [9]:
print(loaded_models)

{'ALV.DE': <Sequential name=sequential, built=True>, 'DBK.DE': <Sequential name=sequential, built=True>, 'VOW3.DE': <Sequential name=sequential, built=True>, 'BMW.DE': <Sequential name=sequential, built=True>, 'ADS.DE': <Sequential name=sequential, built=True>, 'BEI.DE': <Sequential name=sequential, built=True>, 'DTE.SG': <Sequential name=sequential, built=True>, 'SAP.DE': <Sequential name=sequential, built=True>, '1COV.DE': <Sequential name=sequential, built=True>, 'BAS.DE': <Sequential name=sequential, built=True>, 'EOAN.DE': <Sequential name=sequential, built=True>, 'RWE.DE': <Sequential name=sequential, built=True>}


# Prediction for the unkonown sequence and transform back into monetary values

In [10]:
def prediction(symbol):
    # Predict the current share(symbol)
    predictions = loaded_models[symbol].predict(X_pre_dict[symbol])
    return predictions

In [11]:
# create dictionary to store the prediction for every symbol
predictions_dict = {}
predictions = {}

# Iterate over every symbol
for symbol in stock_symbols:

    # Prediction for the actual symbol
    predictions[symbol] = prediction(symbol)

    # Inverse transformation of the prediction
    predictions_original_scale_symbol = scaler.inverse_transform(predictions[symbol])

    # extract the prediction 20 days in the future
    prediction_20_days_ahead_symbol = predictions_original_scale_symbol[0][-2]

    # store the prediction for the current symbol
    predictions_dict[symbol] = {
        'prediction_20_days_ahead': prediction_20_days_ahead_symbol
    }

    # Output the values of the inverse transformation
    print(f"Zurücktransformierte Vorhersagen für {symbol}:")
    print(predictions_original_scale_symbol)
    print(prediction_20_days_ahead_symbol)
print(predictions_dict)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step
Zurücktransformierte Vorhersagen für ALV.DE:
[[1.5086728e+02 1.4913466e+02 1.5042892e+02 1.4720120e+02 8.2779038e+05]]
147.2012
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step
Zurücktransformierte Vorhersagen für DBK.DE:
[[-3.2812428e+01 -4.7802361e+01 -1.1728985e+01 -1.5804074e+01
   2.0142701e+06]]
-15.804074
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 249ms/step
Zurücktransformierte Vorhersagen für VOW3.DE:
[[1.3322235e+02 1.3549811e+02 1.3520354e+02 1.3469041e+02 7.8726838e+05]]
134.69041
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
Zurücktransformierte Vorhersagen für BMW.DE:
[[8.4228035e+01 8.5057350e+01 8.3970306e+01 8.4534317e+01 1.2611940e+06]]
84.53432
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
Zurücktransformierte Vorhersagen für ADS.DE:
[[ 2.3350224e+02  2.3954001e+02  2.2566693e+02  2.1879506e+02
  -3.

# read test CSV from 2019

In [12]:
# time period
start_date = '2019-01-01'
end_date = '2019-12-31'
print(start_date)
print(end_date)

# load and store the data for every symbol
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}_2019.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # read file
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the data with the cleaned rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


[*********************100%%**********************]  1 of 1 completed

2018-01-01
2018-12-31



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [13]:
# load data for every symbol
for symbol in stock_symbols:
    # create name for the CSV
    csv_file = f'stock_data_{symbol}_2019.csv'
    
    # Load CSV
    df = pd.read_csv(csv_file)
    
    # Set date as index
    df.set_index('Date', inplace=True)
    
    # Select the desired column
    selected_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = df[selected_columns]
    
    # show data
    print(f"Daten für Symbol {symbol}:")
    print(df.head)


Daten für Symbol ALV.DE:
<bound method NDFrame.head of                   Open        High         Low       Close   Volume
Date                                                               
2018-01-02  193.100006  193.360001  190.500000  192.699997  1479063
2018-01-03  193.960007  194.000000  191.580002  193.000000  1378661
2018-01-04  194.279999  196.720001  194.179993  195.500000  1551761
2018-01-05  196.199997  198.559998  195.839996  198.559998  1319824
2018-01-08  199.660004  200.000000  198.520004  198.860001   975348
...                ...         ...         ...         ...      ...
2018-12-19  175.600006  177.199997  174.679993  175.940002  1326802
2018-12-20  173.199997  175.820007  172.860001  174.960007  1599316
2018-12-21  174.600006  175.220001  173.699997  175.020004  3334380
2018-12-27  174.679993  174.779999  170.460007  172.160004  1214929
2018-12-28  172.800003  175.919998  172.479996  175.139999   792046

[252 rows x 5 columns]>
Daten für Symbol DBK.DE:
<bound meth

# Compare predictions to actual share values

In [14]:
# list to store the mse (mean squared error) for every symbol
mse_list = []

# list to store the actual and predicted value for every symbol
all_actual_values = []
all_predicted_values = []

# Comparison of the actual and the predicted value for every symbol
for symbol in stock_symbols:
    # create name for the CSV
    csv_file = f'stock_data_{symbol}_2019.csv'
    
    # load CSV
    df = pd.read_csv(csv_file)
    
    # set date as index
    df.set_index('Date', inplace=True)
    
    # extract last row
    last_row = df.iloc[-1]
    
    # Extract the value from the "Close" column of the last row
    actual_close = last_row['Close']
    
    # Calculate the mean squared error (mse)
    mse = mean_squared_error([actual_close], [predictions_dict[symbol]['prediction_20_days_ahead']])
    
    # Add mse to the list
    mse_list.append(mse)
    
    # Create a data frame with just one row for the actual and the predicted "Close" value
    comparison_df = pd.DataFrame({'Actual': [actual_close], 'Predicted': predictions_dict[symbol]['prediction_20_days_ahead']}, index=[last_row.name])
    
    # Output of the comparison
    print(f"Vergleich für Symbol {symbol}:")
    print(comparison_df)
    print(f"MSE für Symbol {symbol}: {mse}")
    print("\n")
    
    # Add the actual and predicted value to the main list
    all_actual_values.append(actual_close)
    all_predicted_values.append(predictions_dict[symbol]['prediction_20_days_ahead'])
    
# calculate the mean squared error (mse) for all predictions
total_mse = mean_squared_error(all_actual_values, all_predicted_values)
standard_deviation = math.sqrt(total_mse)
standard_deviation_share = standard_deviation / 12

# Output of the mse for all values
print(f"Gesamter Mean Squared Error für alle Vorhersagen: {total_mse}")
print(f"Gesamte Standardabweichung für alle Vorhersagen: {standard_deviation}")
print(f"Gesamte Standardabweichung pro Aktie: {standard_deviation_share}")

# Define custom encoder class to handle float32 values
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.float32):
            return float(obj)
        return super(NumpyEncoder, self).default(obj)

# Convert NumPy arrays in lists (if present)
predictions_dict_serializable = {symbol: predictions.tolist() if isinstance(predictions, np.ndarray) else predictions for symbol, predictions in predictions_dict.items()}
print(predictions_dict_serializable)
# extract predictions for the mean variance model
json_file = 'settings/predictions1Year_dict.json'

# Write array in JSON file
with open(json_file, 'w') as f:
    json.dump(predictions_dict_serializable, f, cls=NumpyEncoder)

Vergleich für Symbol ALV.DE:
                Actual   Predicted
2018-12-28  175.139999  147.201202
MSE für Symbol ALV.DE: 780.5763776435051


Vergleich für Symbol DBK.DE:
            Actual  Predicted
2018-12-28   6.967 -15.804074
MSE für Symbol DBK.DE: 518.5218245504111


Vergleich für Symbol VOW3.DE:
                Actual   Predicted
2018-12-28  138.919998  134.690414
MSE für Symbol VOW3.DE: 17.889378615655005


Vergleich für Symbol BMW.DE:
               Actual  Predicted
2018-12-28  70.699997  84.534317
MSE für Symbol BMW.DE: 191.38841175381094


Vergleich für Symbol ADS.DE:
                Actual   Predicted
2018-12-28  182.399994  218.795059
MSE für Symbol ADS.DE: 1324.6007787457181


Vergleich für Symbol BEI.DE:
               Actual  Predicted
2018-12-28  91.160004  97.625023
MSE für Symbol BEI.DE: 41.79647359350929


Vergleich für Symbol DTE.SG:
            Actual   Predicted
2018-12-28   13.69  189.848022
MSE für Symbol DTE.SG: 31031.649025185856


Vergleich für Symbol SAP.D