# Import

In [1]:
from keras.models import load_model
from keras.models import Model
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error
import keras
import yfinance as yf
from matplotlib.backends.backend_pdf import PdfPages
import pickle
import json
from scipy.optimize import minimize
from datetime import datetime, timedelta
import math

# read profit

In [2]:
json_file_path = 'settings/profit_statistical.json'

try:
    # read JSON file
    with open(json_file_path, 'r') as json_file:
        data = json.load(json_file)
        profit = data['profit']
except FileNotFoundError:
    # If the file can not be found, set the value of new_portfolio_value to 0
    profit = 0

print(f'Profit: {profit}')

Profit: 190.0187880492963


# Download stock data

In [3]:
def download_stock_data(stock_symbols, start_date, end_date, output_file):
    with open(output_file, 'w') as f:
        # write column headings
        f.write('Date,Open,High,Low,Close,Volume\n')
        
        #for symbol in stock_symbols:
            # load share data for the symbol (Share name) and the given time period
        stock_data = yf.download(symbol, start=start_date, end=end_date)
        stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]  # Auswahl der gewünschten Spalten
        stock_data.to_csv(f, header=False)  # Schreiben der Daten in die Datei

In [4]:
excel_file_path = 'settings/lastSequenceForPrediction.xlsx'

# store index of the excel file in this txt file
index_file_path = 'settings/lastSequenceForPrediction.txt'

# read index from the from the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0

# read excel file
df_lastSeq = pd.read_excel(excel_file_path)

# Check, if index is out of bounds
if current_row_index >= len(df_lastSeq):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract data from the actual row
    start_date_lastSeq = df_lastSeq.loc[current_row_index, 'start_date']
    end_date_lastSeq = df_lastSeq.loc[current_row_index, 'end_date']

    # Output of the actual data 
    print(f'Startdatum: {start_date_lastSeq}, Enddatum: {end_date_lastSeq}')

Startdatum: 2018-10-01, Enddatum: 2019-05-31


In [5]:
# Enter symbols
stock_symbols = ['ALV.DE', 'DBK.DE', 'VOW3.DE', 'BMW.DE', 'ADS.DE', 'BEI.DE', 'DTE.SG', 'SAP.DE', '1COV.DE', 'BAS.DE', 'EOAN.DE', 'RWE.DE']
start_date = '2018-01-01'
end_date = end_date_lastSeq

# load and store the data for every share
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # read CSV and select select the desired columns
    df = pd.read_csv(output_file, usecols=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])
    
    # Search CSV file for empty lines and remove them
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # filter the empty lines
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the adjusted rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [6]:
# store the data frames for different symbols in the dictionary
dfPre = {}

for symbol in stock_symbols:
    # load CSV
    dfPre[symbol] = pd.read_csv(f'stock_data_{symbol}.csv')

    # Drop empty rows
    dfPre[symbol].dropna(inplace=True)

    # transform date to index
    dfPre[symbol].set_index('Date', inplace=True)

    # Check the number of rows
    num_rows = dfPre[symbol].shape[0]

In [7]:
# Check missing values
missing_values_beforePre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# missing values are replaced with teh mean of the row before and after the actual row
for symbol in stock_symbols:
    for column in dfPre[symbol].columns:
        missing_valuesPre = dfPre[symbol][column].isnull()
        dfPre[symbol].loc[missing_valuesPre, column] = (dfPre[symbol][column].shift() + dfPre[symbol][column].shift(-1)) / 2

# Check if there are still any missing values 
missing_values_afterPre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# Output of the missing values before and after the Treatment
for symbol in stock_symbols:
    print(f"Fehlende Werte vor der Behandlung für {symbol} gefunden:", missing_values_beforePre[symbol])
    print(f"Fehlende Werte nach der Behandlung für {symbol} gefunden:", missing_values_afterPre[symbol])

# Output of the length of the data frame for all symbols
for symbol in stock_symbols:
    print(f"Länge des Datensatzes für {symbol}:", len(dfPre[symbol]))

Fehlende Werte vor der Behandlung für ALV.DE gefunden: False
Fehlende Werte nach der Behandlung für ALV.DE gefunden: False
Fehlende Werte vor der Behandlung für DBK.DE gefunden: False
Fehlende Werte nach der Behandlung für DBK.DE gefunden: False
Fehlende Werte vor der Behandlung für VOW3.DE gefunden: False
Fehlende Werte nach der Behandlung für VOW3.DE gefunden: False
Fehlende Werte vor der Behandlung für BMW.DE gefunden: False
Fehlende Werte nach der Behandlung für BMW.DE gefunden: False
Fehlende Werte vor der Behandlung für ADS.DE gefunden: False
Fehlende Werte nach der Behandlung für ADS.DE gefunden: False
Fehlende Werte vor der Behandlung für BEI.DE gefunden: False
Fehlende Werte nach der Behandlung für BEI.DE gefunden: False
Fehlende Werte vor der Behandlung für DTE.SG gefunden: False
Fehlende Werte nach der Behandlung für DTE.SG gefunden: False
Fehlende Werte vor der Behandlung für SAP.DE gefunden: False
Fehlende Werte nach der Behandlung für SAP.DE gefunden: False
Fehlende Werte

# Read close values at the begin of the time period

In [8]:
# Definition of the time period
# convert in to datetime objekt to substract days for the last day before this period
start_date_obj = datetime.strptime(end_date_lastSeq, '%Y-%m-%d')
end_date_obj = datetime.strptime(end_date_lastSeq, '%Y-%m-%d')
# Substract 5 days for the new start_date and 1 day for the new end_date
# In the next block the last date will be extracted 
new_start_date = start_date_obj - timedelta(days=5)
new_end_date = end_date_obj

print(new_start_date)
print(new_end_date)

# Load and store the data for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_begin{symbol}.csv'
    download_stock_data(symbol, new_start_date, new_end_date, output_file)

    # Search and delete empty rows in the CSV file
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # Filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the cleaned data
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


[*********************100%%**********************]  1 of 1 completed

2019-05-26 00:00:00
2019-05-31 00:00:00



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [9]:
# create data frame for the collected data 
combined_data_begin = pd.DataFrame(columns=['Symbol', 'Close_Value'])

# load and store the data for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_begin{symbol}.csv'
    # read CSV file 
    df = pd.read_csv(output_file)

    # Select the last row only
    last_row = df.tail(1)

     # Write symbol and close value to the data frame
    data = {'Symbol': symbol, 'Close_Value': last_row['Close'].iloc[0]}
    combined_data_begin = pd.concat([combined_data_begin, pd.DataFrame(data, index=[0])], ignore_index=True)

print(combined_data_begin)

     Symbol  Close_Value
0    ALV.DE   201.350006
1    DBK.DE     6.193000
2   VOW3.DE   143.220001
3    BMW.DE    63.049999
4    ADS.DE   258.250000
5    BEI.DE   104.000000
6    DTE.SG    13.690000
7    SAP.DE   112.000000
8   1COV.DE    40.400002
9    BAS.DE    59.820000
10  EOAN.DE     9.379000
11   RWE.DE    22.340000


# load share value from 2019

In [10]:
excel_file_path = 'settings/actualMonth_startEnd.xlsx'

# Path to file, in which the index is stored
index_file_path = 'settings/actualMonthIndex.txt'

# Read the current index from the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0
print(current_row_index)
# read excel file
df = pd.read_excel(excel_file_path)

# Check, if the index is out of bounds
if current_row_index >= len(df):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract the data from the current row
    start_date = df.loc[current_row_index, 'start_date']
    end_date = df.loc[current_row_index, 'end_date']

    # Output of the current data
    print(f'Startdatum: {start_date}, Enddatum: {end_date}')

    # Updating the index in the index file for the next call
    with open(index_file_path, 'w') as index_file:
        index_file.write(str(current_row_index + 1))

5
Startdatum: 2019-06-01, Enddatum: 2019-06-30


In [11]:
start_date = start_date
end_date = end_date
print(f'Startdatum: {start_date}, Enddatum: {end_date}')
# Load and store the symbols for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}_2019.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # search and delete for empty rows in the CSV 
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # Filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the cleaned rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


[*********************100%%**********************]  1 of 1 completed

Startdatum: 2019-06-01, Enddatum: 2019-06-30



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [12]:
# list for the las close value of the shares
last_close_values = []

# load for each symbol
for symbol in stock_symbols:
    # file name of the csv data
    csv_file = f'stock_data_{symbol}_2019.csv'
    
    # load CSV data
    df = pd.read_csv(csv_file)
    
    # set date as index
    df.set_index('Date', inplace=True)
    
    # collect just the last close value and write it to the list
    last_close_value = df['Close'].iloc[-1]
    last_close_values.append((symbol, last_close_value))
print(last_close_values[0][1])
# print the list
print("Liste der letzten Close-Werte:")
for symbol, last_close_value in last_close_values:
    print(f"{symbol}: {last_close_value}")

212.0
Liste der letzten Close-Werte:
ALV.DE: 212.0
DBK.DE: 6.78000020980835
VOW3.DE: 148.22000122070312
BMW.DE: 65.08999633789062
ADS.DE: 271.5
BEI.DE: 105.5500030517578
DTE.SG: 13.6899995803833
SAP.DE: 120.76000213623048
1COV.DE: 44.709999084472656
BAS.DE: 63.91999816894531
EOAN.DE: 9.550999641418455
RWE.DE: 21.670000076293945


# Split into blocks of 20 (20 business days per month)

In [13]:
# Dictionary for the splitted data
df_split = {}

# Quantity of the blocks per symbol
block_size = 20

for symbol, df in dfPre.items():
    # Quantity of the rows in the data frame
    num_rows = len(df)

    # Split the data frame in to blocks
    split_dfs = np.array_split(df, num_rows // block_size)

    # Add the solited blocks to the dictionary
    df_split[symbol] = split_dfs

# Output result
for symbol, split_dfs in df_split.items():
    print(f"Symbol: {symbol}")
    for i, df_block in enumerate(split_dfs):
        print(f"Block {i+1}:")
        print(df_block)
        print()

Symbol: ALV.DE
Block 1:
                  Open        High         Low       Close   Volume
Date                                                               
2018-01-02  193.100006  193.360001  190.500000  192.699997  1479063
2018-01-03  193.960007  194.000000  191.580002  193.000000  1378661
2018-01-04  194.279999  196.720001  194.179993  195.500000  1551761
2018-01-05  196.199997  198.559998  195.839996  198.559998  1319824
2018-01-08  199.660004  200.000000  198.520004  198.860001   975348
2018-01-09  198.940002  200.449997  198.639999  200.449997  1150579
2018-01-10  200.000000  201.449997  198.839996  200.600006  1295873
2018-01-11  200.949997  201.350006  199.580002  200.800003  1150834
2018-01-12  201.500000  202.149994  200.250000  202.000000  1231006
2018-01-15  202.149994  202.949997  201.350006  202.100006   923834
2018-01-16  202.800003  203.899994  200.949997  202.050003  1181565
2018-01-17  201.600006  203.350006  201.399994  202.449997  1137284
2018-01-18  202.899994  

                  Open        High         Low       Close  Volume
Date                                                              
2019-04-01  218.250000  219.500000  216.600006  219.199997  800329
2019-04-02  219.850006  222.100006  219.550003  221.649994  558095
2019-04-03  223.899994  224.000000  221.949997  223.550003  575660
2019-04-04  223.500000  226.399994  221.949997  226.399994  832637
2019-04-05  227.649994  228.399994  225.149994  227.550003  787085
2019-04-08  227.600006  227.949997  224.199997  224.750000  554407
2019-04-09  224.699997  226.399994  223.800003  224.149994  613298
2019-04-10  224.449997  226.399994  224.250000  226.300003  486302
2019-04-11  226.600006  227.250000  224.850006  225.399994  458705
2019-04-12  225.350006  226.500000  224.850006  225.399994  499359
2019-04-15  225.050003  227.000000  224.250000  226.300003  348061
2019-04-16  227.449997  230.149994  227.250000  229.699997  647503
2019-04-17  229.699997  230.750000  227.949997  229.449997  50

                 Open       High        Low      Close   Volume
Date                                                           
2019-03-01  82.300003  83.540001  81.180000  83.360001  1083753
2019-03-04  83.400002  83.760002  82.519997  83.500000   608391
2019-03-05  83.500000  85.080002  83.199997  85.000000   695620
2019-03-06  84.900002  85.320000  84.400002  84.800003   572323
2019-03-07  84.680000  86.760002  84.500000  86.419998   673094
2019-03-08  85.900002  86.839996  85.639999  86.120003   412850
2019-03-11  86.199997  86.300003  84.639999  85.000000   351740
2019-03-12  85.339996  85.699997  84.720001  85.559998   316214
2019-03-13  85.500000  86.500000  85.360001  85.720001   377456
2019-03-14  85.940002  86.980003  85.760002  86.860001   444210
2019-03-15  87.000000  87.519997  86.500000  87.000000   761932
2019-03-18  87.000000  87.580002  86.800003  87.459999   338663
2019-03-19  87.339996  87.959999  87.160004  87.339996   378157
2019-03-20  89.099998  89.099998  88.320

                 Open       High        Low      Close   Volume
Date                                                           
2018-07-31  22.330000  22.600000  22.260000  22.440001  2423727
2018-08-01  22.430000  22.540001  21.730000  21.770000  4460237
2018-08-02  21.709999  21.930000  21.410000  21.469999  2721865
2018-08-03  21.510000  21.639999  20.850000  21.250000  4392703
2018-08-06  21.330000  21.360001  21.010000  21.260000  2268881
2018-08-07  21.200001  21.410000  21.200001  21.270000  2243146
2018-08-08  21.260000  21.280001  20.860001  21.049999  2926087
2018-08-09  21.049999  21.139999  20.700001  21.010000  2297340
2018-08-10  20.910000  20.959999  20.700001  20.809999  2008899
2018-08-13  20.700001  21.160000  20.629999  20.889999  2490939
2018-08-14  21.160000  21.770000  21.010000  21.650000  6048264
2018-08-15  21.740000  21.770000  21.080000  21.170000  3323842
2018-08-16  21.209999  21.270000  20.870001  21.209999  1997099
2018-08-17  21.260000  21.309999  20.830

# Calculate the return for every month

In [14]:
# Dictionary for the increase/decrease per symbol
change_per_symbol = {}

for symbol, split_dfs in df_split.items():
    changes = []

    # Iterate over every block of the current symbol
    for df_block in split_dfs:
        # Extract the first and the lat close value of the current block
        first_close = df_block.iloc[0]['Close']
        last_close = df_block.iloc[-1]['Close']

        # Calculate the change of the close value
        change = (last_close - first_close) / first_close * 100  # in Prozent
        changes.append(change)

    # Save the Change for the current symbol
    change_per_symbol[symbol] = changes

# Output result
for symbol, changes in change_per_symbol.items():
    print(f"Symbol: {symbol}")
    for i, change in enumerate(changes):
        print(f"Block {i+1}: {change:.2f}%")
    print()


Symbol: ALV.DE
Block 1: 5.73%
Block 2: -5.64%
Block 3: -2.86%
Block 4: 8.58%
Block 5: -10.61%
Block 6: -1.47%
Block 7: 7.86%
Block 8: -0.94%
Block 9: 4.27%
Block 10: -9.24%
Block 11: 4.60%
Block 12: -7.72%
Block 13: 6.00%
Block 14: 5.78%
Block 15: 0.48%
Block 16: 7.43%
Block 17: -6.37%

Symbol: DBK.DE
Block 1: -7.29%
Block 2: -10.77%
Block 3: -12.54%
Block 4: 2.49%
Block 5: -19.06%
Block 6: -2.00%
Block 7: 19.66%
Block 8: -11.86%
Block 9: 2.72%
Block 10: -16.57%
Block 11: -0.36%
Block 12: -19.53%
Block 13: 12.81%
Block 14: 5.36%
Block 15: -11.17%
Block 16: -1.82%
Block 17: -15.70%

Symbol: VOW3.DE
Block 1: 7.79%
Block 2: -8.98%
Block 3: 2.69%
Block 4: 9.52%
Block 5: -8.58%
Block 6: -12.29%
Block 7: 7.34%
Block 8: -5.81%
Block 9: 5.69%
Block 10: -11.04%
Block 11: 8.26%
Block 12: -6.14%
Block 13: 8.19%
Block 14: 1.53%
Block 15: -8.99%
Block 16: 11.27%
Block 17: -10.64%

Symbol: BMW.DE
Block 1: 7.22%
Block 2: -5.60%
Block 3: 3.34%
Block 4: 4.97%
Block 5: -7.36%
Block 6: -9.61%
Block 7: 6.

# Calculate the mean return over the period of 4 years

In [15]:
# Dictionary for the mean return per symbol
average_return_per_symbol = {}

for symbol, changes in change_per_symbol.items():
    # Calculate the mean return for the current symbol
    average_return = sum(changes) / len(changes)
    average_return_per_symbol[symbol] = average_return

# Output result
for symbol, avg_return in average_return_per_symbol.items():
    print(f"Durchschnittliche Rendite für {symbol}: {avg_return:.2f}%")
    
# Convert mean return to DataFrame
average_return_per_symbol_df = pd.DataFrame(list(average_return_per_symbol.items()), columns=['Symbol', 'Average_Return'])
print(average_return_per_symbol_df)


Durchschnittliche Rendite für ALV.DE: 0.35%
Durchschnittliche Rendite für DBK.DE: -5.04%
Durchschnittliche Rendite für VOW3.DE: -0.60%
Durchschnittliche Rendite für BMW.DE: -1.49%
Durchschnittliche Rendite für ADS.DE: 2.66%
Durchschnittliche Rendite für BEI.DE: 0.56%
Durchschnittliche Rendite für DTE.SG: -0.27%
Durchschnittliche Rendite für SAP.DE: 1.26%
Durchschnittliche Rendite für 1COV.DE: -3.60%
Durchschnittliche Rendite für BAS.DE: -1.98%
Durchschnittliche Rendite für EOAN.DE: 0.12%
Durchschnittliche Rendite für RWE.DE: 1.75%
     Symbol  Average_Return
0    ALV.DE        0.345628
1    DBK.DE       -5.037557
2   VOW3.DE       -0.599211
3    BMW.DE       -1.493103
4    ADS.DE        2.656835
5    BEI.DE        0.556415
6    DTE.SG       -0.265045
7    SAP.DE        1.263489
8   1COV.DE       -3.601562
9    BAS.DE       -1.981975
10  EOAN.DE        0.120209
11   RWE.DE        1.746373


# Accuracy of the mean return to the actual return

In [16]:
MonetarizedMeanReturn = {}
mse_per_symbol = {}
# list to store the actual and predicted value for every symbol
all_actual_values = []
all_predicted_values = []
for symbol in stock_symbols:
    # Get the close value for the current symbol
    value_begin = combined_data_begin.loc[combined_data_begin['Symbol'] == symbol, 'Close_Value'].values
    # Get the close value for the current symbol from last_close_values
    value_end = next(value for sym, value in last_close_values if sym == symbol)
    # Get the average return for the current symbol
    average_return = average_return_per_symbol_df.loc[average_return_per_symbol_df['Symbol'] == symbol, 'Average_Return'].values
    MonetarizedMeanReturn[symbol] = value_begin * (1 + (average_return/100))
    mse = mean_squared_error([value_end], [MonetarizedMeanReturn[symbol]])
    #Print(mse)
    
    #dd mse to the list
    mse_per_symbol[symbol] = mse
    
    # Add the actual and predicted value to the main list
    all_actual_values.append(value_end)
    all_predicted_values.append(MonetarizedMeanReturn[symbol])
    
total_mse = mean_squared_error(all_actual_values, all_predicted_values)
standard_deviation = math.sqrt(total_mse)
standard_deviation_share = standard_deviation / 12

# Output of the mse for all values
print(f"Gesamter Mean Squared Error für alle Vorhersagen: {total_mse}")
print(f"Gesamte Standardabweichung für alle Vorhersagen: {standard_deviation}")
print(f"Gesamte Standardabweichung pro Aktie: {standard_deviation_share}")

Gesamter Mean Squared Error für alle Vorhersagen: 25.09416215566351
Gesamte Standardabweichung für alle Vorhersagen: 5.009407365713384
Gesamte Standardabweichung pro Aktie: 0.4174506138094487


# Standard deviation

In [17]:
# Dictionary for the standard deviation per symbol
standard_deviation_per_symbol = {}

for symbol, changes in change_per_symbol.items():
    
    # Calculate the difference to the power of 2
    squared_diff = [(change - average_return_per_symbol[symbol]) ** 2 for change in changes]
    
    # Sum up the differences
    sum_squared_diff = sum(squared_diff)

    # Calculate the standard deviation
    standard_deviation = np.sqrt(sum_squared_diff / (len(changes) - 1))
    
    # Save the standard deviation in the dictionary
    standard_deviation_per_symbol[symbol] = standard_deviation

# Output result
for symbol, std_dev in standard_deviation_per_symbol.items():
    print(f"Standardabweichung für {symbol}: {std_dev:.2f}")
    
# Convert mean return to DataFrame
standard_deviation_per_symbol_df = pd.DataFrame(list(standard_deviation_per_symbol.items()), columns=['Symbol', 'Standard_Deviation'])
print(standard_deviation_per_symbol_df)

Standardabweichung für ALV.DE: 6.48
Standardabweichung für DBK.DE: 11.26
Standardabweichung für VOW3.DE: 8.64
Standardabweichung für BMW.DE: 7.12
Standardabweichung für ADS.DE: 6.46
Standardabweichung für BEI.DE: 5.03
Standardabweichung für DTE.SG: 4.08
Standardabweichung für SAP.DE: 6.05
Standardabweichung für 1COV.DE: 9.94
Standardabweichung für BAS.DE: 7.40
Standardabweichung für EOAN.DE: 4.92
Standardabweichung für RWE.DE: 9.91
     Symbol  Standard_Deviation
0    ALV.DE            6.479179
1    DBK.DE           11.255463
2   VOW3.DE            8.644622
3    BMW.DE            7.117380
4    ADS.DE            6.464889
5    BEI.DE            5.029820
6    DTE.SG            4.075733
7    SAP.DE            6.054960
8   1COV.DE            9.940515
9    BAS.DE            7.403578
10  EOAN.DE            4.920782
11   RWE.DE            9.908389


# Historic data from 2018 for the covarianz matrix

In [18]:
excel_file_path = 'settings/lastSequenceForPrediction.xlsx'

# path to the data, in which the index is stored
index_file_path = 'settings/lastSequenceForPrediction.txt'

# read the current index from the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0

# read excel file
df_lastSeq = pd.read_excel(excel_file_path)

# Check, if index is out of bounds
if current_row_index >= len(df_lastSeq):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract data from the current row
    start_date_lastSeq = df_lastSeq.loc[current_row_index, 'start_date']
    end_date_lastSeq = df_lastSeq.loc[current_row_index, 'end_date']
    
    # Output of the current data 
    print(f'Startdatum: {start_date_lastSeq}, Enddatum: {end_date_lastSeq}')

    # Update the index in the index file for the next call
    with open(index_file_path, 'w') as index_file:
        index_file.write(str(current_row_index + 1))

Startdatum: 2018-10-01, Enddatum: 2019-05-31


In [19]:
# Definition of the time period
start_date2018 = '2018-01-01'
end_date2018 = end_date_lastSeq
print(end_date2018)
# Load and store the data for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}_2018.csv'
    download_stock_data(symbol, start_date2018, end_date2018, output_file)

    # Search and delete empty rows in the CSV file
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # Filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the cleaned rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


[*********************100%%**********************]  1 of 1 completed

2019-05-31



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


# Combine historical data to one file

In [20]:
# Data frame for the combined data
combined_historical2018_data = pd.DataFrame()

# load and combine the data for each share
for symbol in stock_symbols:
    # file name for the csv file
    csv_file = f'stock_data_{symbol}_2018.csv'
    
    # read CSV file and set date to index
    df = pd.read_csv(csv_file, index_col='Date', parse_dates=True)
    
    # change name of the close column
    df.rename(columns={'Close': symbol}, inplace=True)
    
    # write data to the combined file
    combined_historical2018_data = pd.concat([combined_historical2018_data, df[symbol]], axis=1)

# drop rows with NaN values
combined_historical2018_data.dropna(inplace=True)
    
# create a CSV file for the data
combined_historical2018_data.to_csv('combined_historical2018_stock_data.csv')

print(combined_historical2018_data)

                ALV.DE     DBK.DE     VOW3.DE     BMW.DE      ADS.DE  \
2018-01-02  192.699997  15.958000  165.699997  86.400002  167.149994   
2018-01-03  193.000000  15.910000  171.440002  86.860001  168.050003   
2018-01-04  195.500000  16.332001  174.440002  87.480003  170.250000   
2018-01-05  198.559998  15.490000  179.199997  88.500000  172.050003   
2018-01-08  198.860001  15.340000  179.839996  89.669998  172.750000   
...                ...        ...         ...        ...         ...   
2019-05-24  202.000000   6.367000  142.860001  63.470001  259.250000   
2019-05-27  202.800003   6.447000  144.059998  63.759998  261.850006   
2019-05-28  201.649994   6.305000  144.839996  63.840000  264.049988   
2019-05-29  199.080002   6.152000  143.279999  63.150002  258.600006   
2019-05-30  201.350006   6.193000  143.220001  63.049999  258.250000   

                BEI.DE  DTE.SG      SAP.DE    1COV.DE     BAS.DE  EOAN.DE  \
2018-01-02   96.199997  14.872   92.800003  85.639999  91.

# Mean-Variance Optimization

In [21]:
def mean_variance_optimization(expected_returns, standard_deviations, covariance_matrix):
    n = len(expected_returns)
    initial_weights = np.array([1/n] * n)  # Start value of the weighs
    bounds = [(0, 1)] * n  # Border of the weighs (0-100% for every symbol)

    # Minimize the negative sharpe ratio
    def negative_sharpe(weights, expected_returns, standard_deviations, covariance_matrix):
        portfolio_return = np.dot(weights, expected_returns)
        portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(covariance_matrix, weights)))
        return -portfolio_return / portfolio_volatility

    result = minimize(negative_sharpe, initial_weights, args=(expected_returns, standard_deviations, covariance_matrix), bounds=bounds, constraints={'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    return result.x

In [22]:
def allocate_portfolio(historical_data, initial_capital, expected_returns_df, standard_deviations_df):
    # Calculate expected returns based on the predictions
    expected_returns = expected_returns_df['Average_Return'].values
    
    # Calculate standard deviations based on the predictions
    standard_deviations = standard_deviations_df['Standard_Deviation'].values
    
    # Calculate the covariance matrix of the returns
    covariance_matrix = historical_data.cov()
           
    print(covariance_matrix)
    
    # Perform mean-variance optimization to obtain optimal weights
    optimal_weights = mean_variance_optimization(expected_returns, standard_deviations, covariance_matrix)

    # Calculate the allocation of assets based on the optimal weights and the available capital
    asset_allocation = initial_capital * optimal_weights

    return asset_allocation

In [23]:
initial_capital = 1000 + profit # 1000€ start capital
portfolio_allocation = allocate_portfolio(combined_historical2018_data, initial_capital, average_return_per_symbol_df, standard_deviation_per_symbol_df)
print(portfolio_allocation)

            ALV.DE     DBK.DE     VOW3.DE      BMW.DE      ADS.DE     BEI.DE  \
ALV.DE   91.146144   2.009172   45.481596    2.148158   83.415180   3.249461   
DBK.DE    2.009172   5.463435   21.107106   15.570270  -28.870681   0.949123   
VOW3.DE  45.481596  21.107106  148.222470   72.805664  -74.624370   3.161183   
BMW.DE    2.148158  15.570270   72.805664   58.005903  -78.985916   6.142641   
ADS.DE   83.415180 -28.870681  -74.624370  -78.985916  358.192749  16.066889   
BEI.DE    3.249461   0.949123    3.161183    6.142641   16.066889  19.747811   
DTE.SG    1.180297   0.260019    1.480631    0.723540   -0.428176   0.608965   
SAP.DE   26.793063  -7.465527  -28.460259  -19.361958   97.578746  21.341105   
1COV.DE  -7.727993  35.238155  124.704878  114.789646 -188.466414  18.536195   
BAS.DE    8.956294  21.352349   91.952394   73.378054 -103.292611  13.995387   
EOAN.DE   1.254359  -0.525715   -1.520625   -1.261739    3.678322   0.287564   
RWE.DE    5.440847  -2.985953  -10.74003

In [24]:
# Initialisation of the portfolio
portfolio_value = 0
# initialize the list for the quantity of the shares, which have to be purchased
shares_to_buy_list = []
# Purchase the shares based on the allocations
for i, allocation in enumerate(portfolio_allocation):
    # Calculate the quantity of the shares, which should be purchased with this allocation
    shares_to_buy = (allocation) / combined_data_begin['Close_Value'][i]
    shares_to_buy_list.append(shares_to_buy)
    print(shares_to_buy)

    # Calculate the value of the stock of the purchased shares
    value_of_stock = shares_to_buy * combined_data_begin['Close_Value'][i]
    print(value_of_stock)
    
    # Add the value of the purchased share to the portfolio
    portfolio_value += value_of_stock
    print(portfolio_value)
    print("-----------------------------")

# Output of the total value of the portfolio at the end of the period to be predicted
print("Gesamtwert des Portfolios am Ende des ersten Monats von 2019:", portfolio_value)
print(shares_to_buy_list)

0.0
0.0
0.0
-----------------------------
0.0
0.0
0.0
-----------------------------
0.4672887318993654
66.92509275304792
66.92509275304792
-----------------------------
1.275756834646368e-11
8.043646745112828e-10
66.92509275385228
-----------------------------
0.0
0.0
66.92509275385228
-----------------------------
9.570229357953807e-11
9.953038532271959e-09
66.92509276380532
-----------------------------
6.444051691739479e-11
8.821906495588177e-10
66.92509276468752
-----------------------------
1.0301728729370442e-10
1.1537936176894895e-08
66.92509277622545
-----------------------------
0.1369306982985966
5.532000420202968
72.45709319642842
-----------------------------
1.0163064400322468e-11
6.079545093257689e-10
72.45709319703637
-----------------------------
1.1981264837005212e-10
1.1237227943269962e-09
72.4570931981601
-----------------------------
50.02514267048557
1117.5616948918787
1190.0187880900387
-----------------------------
Gesamtwert des Portfolios am Ende des ersten Mon

In [25]:
# Define portfolio value
new_portfolio_value = 0
# Loop over the indices of the two lists
for i in range(len(shares_to_buy_list)):
    # convert to float
    last_close_value = float(last_close_values[i][1])
    print(last_close_value)
    print(shares_to_buy_list[i])
    #Calculate the new value of the share and add the value to the list
    new_portfolio_value += (last_close_value * shares_to_buy_list[i])
    print(new_portfolio_value)
    print("----------------------------")

print("Wert des Portfolios:")
print(new_portfolio_value)

212.0
0.0
0.0
----------------------------
6.78000020980835
0.0
0.0
----------------------------
148.22000122070312
0.4672887318993654
69.26153641254476
----------------------------
65.08999633789062
1.275756834646368e-11
69.26153641337515
----------------------------
271.5
0.0
69.26153641337515
----------------------------
105.5500030517578
9.570229357953807e-11
69.26153642347653
----------------------------
13.6899995803833
6.444051691739479e-11
69.26153642435872
----------------------------
120.76000213623048
1.0301728729370442e-10
69.26153643679909
----------------------------
44.709999084472656
0.1369306982985966
75.38370783236554
----------------------------
63.91999816894531
1.0163064400322468e-11
75.38370783301517
----------------------------
9.550999641418455
1.1981264837005212e-10
75.3837078341595
----------------------------
21.670000076293945
50.02514267048557
1159.4285533201971
----------------------------
Wert des Portfolios:
1159.4285533201971


In [26]:
# just the profit, to store it
profit = new_portfolio_value - 1000

# path to JSON file
json_file_path = 'settings/profit_statistical.json'

# Create Dictionary with the value of new_portfolio_value
data = {'profit': profit}

# Write JSON file
with open(json_file_path, 'w') as json_file:
    json.dump(data, json_file)

print(f'Wert von profit wurde erfolgreich in {json_file_path} gespeichert.')

Wert von profit wurde erfolgreich in settings/profit_statistical.json gespeichert.
