# Import

In [1]:
from keras.models import load_model
from keras.models import Model
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error
import keras
import yfinance as yf
from matplotlib.backends.backend_pdf import PdfPages
import pickle
import json
from scipy.optimize import minimize
from datetime import datetime, timedelta

# read profit

In [2]:
json_file_path = 'settings/profit_statistical.json'

try:
    # read JSON file
    with open(json_file_path, 'r') as json_file:
        data = json.load(json_file)
        profit = data['profit']
except FileNotFoundError:
    # If the file can not be found, set the value of new_portfolio_value to 0
    profit = 0

print(f'Profit: {profit}')

Profit: 74.0635302055548


# Download stock data

In [3]:
def download_stock_data(stock_symbols, start_date, end_date, output_file):
    with open(output_file, 'w') as f:
        # write column headings
        f.write('Date,Open,High,Low,Close,Volume\n')
        
        #for symbol in stock_symbols:
            # load share data for the symbol (Share name) and the given time period
        stock_data = yf.download(symbol, start=start_date, end=end_date)
        stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]  # Auswahl der gewünschten Spalten
        stock_data.to_csv(f, header=False)  # Schreiben der Daten in die Datei

In [4]:
excel_file_path = 'settings/lastSequenceForPrediction.xlsx'

# store index of the excel file in this txt file
index_file_path = 'settings/lastSequenceForPrediction.txt'

# read index from the from the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0

# read excel file
df_lastSeq = pd.read_excel(excel_file_path)

# Check, if index is out of bounds
if current_row_index >= len(df_lastSeq):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract data from the actual row
    start_date_lastSeq = df_lastSeq.loc[current_row_index, 'start_date']
    end_date_lastSeq = df_lastSeq.loc[current_row_index, 'end_date']

    # Output of the actual data 
    print(f'Startdatum: {start_date_lastSeq}, Enddatum: {end_date_lastSeq}')

Startdatum: 2023-10-01, Enddatum: 2024-03-31


In [5]:
# Enter symbols
stock_symbols = ['ALV.DE', 'DBK.DE', 'VOW3.DE', 'BMW.DE', 'ADS.DE', 'BEI.DE', 'DTE.SG', 'SAP.DE', '1COV.DE', 'BAS.DE', 'EOAN.DE', 'RWE.DE']
start_date = '2023-01-01'
end_date = end_date_lastSeq

# load and store the data for every share
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # read CSV and select select the desired columns
    df = pd.read_csv(output_file, usecols=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])
    
    # Search CSV file for empty lines and remove them
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # filter the empty lines
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the adjusted rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [6]:
# store the data frames for different symbols in the dictionary
dfPre = {}

for symbol in stock_symbols:
    # load CSV
    dfPre[symbol] = pd.read_csv(f'stock_data_{symbol}.csv')

    # Drop empty rows
    dfPre[symbol].dropna(inplace=True)

    # transform date to index
    dfPre[symbol].set_index('Date', inplace=True)

    # Check the number of rows
    num_rows = dfPre[symbol].shape[0]

In [7]:
# Check missing values
missing_values_beforePre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# missing values are replaced with teh mean of the row before and after the actual row
for symbol in stock_symbols:
    for column in dfPre[symbol].columns:
        missing_valuesPre = dfPre[symbol][column].isnull()
        dfPre[symbol].loc[missing_valuesPre, column] = (dfPre[symbol][column].shift() + dfPre[symbol][column].shift(-1)) / 2

# Check if there are still any missing values 
missing_values_afterPre = {symbol: dfPre[symbol].isnull().values.any() for symbol in stock_symbols}

# Output of the missing values before and after the Treatment
for symbol in stock_symbols:
    print(f"Fehlende Werte vor der Behandlung für {symbol} gefunden:", missing_values_beforePre[symbol])
    print(f"Fehlende Werte nach der Behandlung für {symbol} gefunden:", missing_values_afterPre[symbol])

# Output of the length of the data frame for all symbols
for symbol in stock_symbols:
    print(f"Länge des Datensatzes für {symbol}:", len(dfPre[symbol]))

Fehlende Werte vor der Behandlung für ALV.DE gefunden: False
Fehlende Werte nach der Behandlung für ALV.DE gefunden: False
Fehlende Werte vor der Behandlung für DBK.DE gefunden: False
Fehlende Werte nach der Behandlung für DBK.DE gefunden: False
Fehlende Werte vor der Behandlung für VOW3.DE gefunden: False
Fehlende Werte nach der Behandlung für VOW3.DE gefunden: False
Fehlende Werte vor der Behandlung für BMW.DE gefunden: False
Fehlende Werte nach der Behandlung für BMW.DE gefunden: False
Fehlende Werte vor der Behandlung für ADS.DE gefunden: False
Fehlende Werte nach der Behandlung für ADS.DE gefunden: False
Fehlende Werte vor der Behandlung für BEI.DE gefunden: False
Fehlende Werte nach der Behandlung für BEI.DE gefunden: False
Fehlende Werte vor der Behandlung für DTE.SG gefunden: False
Fehlende Werte nach der Behandlung für DTE.SG gefunden: False
Fehlende Werte vor der Behandlung für SAP.DE gefunden: False
Fehlende Werte nach der Behandlung für SAP.DE gefunden: False
Fehlende Werte

# Read close values at the begin of the time period

In [8]:
# Definition of the time period
# convert in to datetime objekt to substract days for the last day before this period
start_date_obj = datetime.strptime(end_date_lastSeq, '%Y-%m-%d')
end_date_obj = datetime.strptime(end_date_lastSeq, '%Y-%m-%d')
# Substract 5 days for the new start_date and 1 day for the new end_date
# In the next block the last date will be extracted 
new_start_date = start_date_obj - timedelta(days=5)
new_end_date = end_date_obj

print(new_start_date)
print(new_end_date)

# Load and store the data for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_begin{symbol}.csv'
    download_stock_data(symbol, new_start_date, new_end_date, output_file)

    # Search and delete empty rows in the CSV file
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # Filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the cleaned data
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


[*********************100%%**********************]  1 of 1 completed

2024-03-26 00:00:00
2024-03-31 00:00:00



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [9]:
# create data frame for the collected data 
combined_data_begin = pd.DataFrame(columns=['Symbol', 'Close_Value'])

# load and store the data for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_begin{symbol}.csv'
    # read CSV file 
    df = pd.read_csv(output_file)

    # Select the last row only
    last_row = df.tail(1)

     # Write symbol and close value to the data frame
    data = {'Symbol': symbol, 'Close_Value': last_row['Close'].iloc[0]}
    combined_data_begin = pd.concat([combined_data_begin, pd.DataFrame(data, index=[0])], ignore_index=True)

print(combined_data_begin)

     Symbol  Close_Value
0    ALV.DE   277.799988
1    DBK.DE    14.582000
2   VOW3.DE   122.839996
3    BMW.DE   106.959999
4    ADS.DE   207.000000
5    BEI.DE   134.949997
6    DTE.SG    22.434999
7    SAP.DE   180.460007
8   1COV.DE    50.680000
9    BAS.DE    52.930000
10  EOAN.DE    12.885000
11   RWE.DE    31.459999


# load share value from 2019

In [10]:
excel_file_path = 'settings/actualMonth_startEnd.xlsx'

# Path to file, in which the index is stored
index_file_path = 'settings/actualMonthIndex.txt'

# Read the current index from the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0
print(current_row_index)
# read excel file
df = pd.read_excel(excel_file_path)

# Check, if the index is out of bounds
if current_row_index >= len(df):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract the data from the current row
    start_date = df.loc[current_row_index, 'start_date']
    end_date = df.loc[current_row_index, 'end_date']

    # Output of the current data
    print(f'Startdatum: {start_date}, Enddatum: {end_date}')

    # Updating the index in the index file for the next call
    with open(index_file_path, 'w') as index_file:
        index_file.write(str(current_row_index + 1))

15
Startdatum: 2024-04-01, Enddatum: 2024-04-30


In [11]:
start_date = start_date
end_date = end_date
print(f'Startdatum: {start_date}, Enddatum: {end_date}')
# Load and store the symbols for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}_2019.csv'
    download_stock_data(symbol, start_date, end_date, output_file)

    # search and delete for empty rows in the CSV 
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # Filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the cleaned rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


[*********************100%%**********************]  1 of 1 completed

Startdatum: 2024-04-01, Enddatum: 2024-04-30



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [12]:
# list for the las close value of the shares
last_close_values = []

# load for each symbol
for symbol in stock_symbols:
    # file name of the csv data
    csv_file = f'stock_data_{symbol}_2019.csv'
    
    # load CSV data
    df = pd.read_csv(csv_file)
    
    # set date as index
    df.set_index('Date', inplace=True)
    
    # collect just the last close value and write it to the list
    last_close_value = df['Close'].iloc[-1]
    last_close_values.append((symbol, last_close_value))
print(last_close_values[0][1])
# print the list
print("Liste der letzten Close-Werte:")
for symbol, last_close_value in last_close_values:
    print(f"{symbol}: {last_close_value}")

266.29998779296875
Liste der letzten Close-Werte:
ALV.DE: 266.29998779296875
DBK.DE: 15.104000091552734
VOW3.DE: 120.75
BMW.DE: 106.8000030517578
ADS.DE: 232.3000030517578
BEI.DE: 139.4499969482422
DTE.SG: 21.71999931335449
SAP.DE: 171.4199981689453
1COV.DE: 47.54999923706055
BAS.DE: 49.05500030517578
EOAN.DE: 12.46500015258789
RWE.DE: 33.0


# Split into blocks of 20 (20 business days per month)

In [13]:
# Dictionary for the splitted data
df_split = {}

# Quantity of the blocks per symbol
block_size = 20

for symbol, df in dfPre.items():
    # Quantity of the rows in the data frame
    num_rows = len(df)

    # Split the data frame in to blocks
    split_dfs = np.array_split(df, num_rows // block_size)

    # Add the solited blocks to the dictionary
    df_split[symbol] = split_dfs

# Output result
for symbol, split_dfs in df_split.items():
    print(f"Symbol: {symbol}")
    for i, df_block in enumerate(split_dfs):
        print(f"Block {i+1}:")
        print(df_block)
        print()

Symbol: ALV.DE
Block 1:
                  Open        High         Low       Close   Volume
Date                                                               
2023-01-02  201.500000  203.199997  200.699997  203.050003   513902
2023-01-03  202.350006  205.949997  202.300003  205.199997   747880
2023-01-04  205.600006  211.649994  205.550003  211.500000  1561576
2023-01-05  210.800003  211.050003  208.750000  209.550003   753061
2023-01-06  210.000000  211.800003  209.100006  211.800003   829185
2023-01-09  213.050003  215.149994  212.800003  213.649994  1123011
2023-01-10  213.699997  216.300003  213.600006  216.300003   837089
2023-01-11  217.100006  217.100006  214.199997  215.399994   922664
2023-01-12  216.300003  218.149994  215.199997  217.699997  1016058
2023-01-13  217.750000  219.300003  216.850006  217.750000  1076232
2023-01-16  218.449997  220.100006  218.149994  219.000000   937047
2023-01-17  219.500000  219.600006  217.949997  219.000000   806713
2023-01-18  219.050003  

                  Open        High         Low       Close  Volume
Date                                                              
2023-04-04  121.699997  122.250000  120.800003  120.900002  196739
2023-04-05  121.400002  124.099998  119.500000  122.800003  702000
2023-04-06  122.900002  122.900002  120.050003  121.300003  347155
2023-04-11  121.250000  121.750000  120.949997  121.400002  257463
2023-04-12  121.400002  121.599998  120.199997  120.900002  252316
2023-04-13  121.300003  122.699997  121.300003  121.849998  309206
2023-04-14  121.550003  121.599998  120.199997  120.500000  198769
2023-04-17  120.650002  122.000000  120.550003  121.449997  252386
2023-04-18  121.449997  122.250000  120.650002  122.250000  254201
2023-04-19  122.449997  124.050003  122.449997  123.300003  280607
2023-04-20  123.550003  124.650002  123.099998  124.099998  272213
2023-04-21  123.900002  126.199997  123.900002  125.800003  285413
2023-04-24  125.650002  125.800003  124.750000  125.300003  18

                 Open       High        Low      Close   Volume
Date                                                           
2024-02-29  30.840000  31.379999  30.580000  31.000000  4775663
2024-03-01  30.920000  31.280001  30.549999  30.660000  3647263
2024-03-04  30.770000  30.920000  30.490000  30.600000  2814596
2024-03-05  30.760000  31.709999  30.600000  31.510000  3487667
2024-03-06  31.500000  32.060001  31.410000  31.740000  4117148
2024-03-07  31.809999  32.580002  31.600000  32.230000  3256248
2024-03-08  32.380001  32.680000  31.799999  31.840000  3198288
2024-03-11  31.950001  32.200001  31.420000  31.520000  2210842
2024-03-12  31.570000  31.910000  31.240000  31.480000  3064367
2024-03-13  31.799999  32.259998  31.400000  31.740000  3951445
2024-03-14  32.360001  32.910000  30.620001  30.620001  7639018
2024-03-15  30.850000  31.469999  30.670000  31.010000  5439149
2024-03-18  30.719999  31.490000  30.719999  30.940001  2328260
2024-03-19  31.010000  31.049999  30.430

# Calculate the return for every month

In [14]:
# Dictionary for the increase/decrease per symbol
change_per_symbol = {}

for symbol, split_dfs in df_split.items():
    changes = []

    # Iterate over every block of the current symbol
    for df_block in split_dfs:
        # Extract the first and the lat close value of the current block
        first_close = df_block.iloc[0]['Close']
        last_close = df_block.iloc[-1]['Close']

        # Calculate the change of the close value
        change = (last_close - first_close) / first_close * 100  # in Prozent
        changes.append(change)

    # Save the Change for the current symbol
    change_per_symbol[symbol] = changes

# Output result
for symbol, changes in change_per_symbol.items():
    print(f"Symbol: {symbol}")
    for i, change in enumerate(changes):
        print(f"Block {i+1}: {change:.2f}%")
    print()


Symbol: ALV.DE
Block 1: 7.95%
Block 2: 0.00%
Block 3: -3.79%
Block 4: 1.29%
Block 5: -3.25%
Block 6: 1.28%
Block 7: 2.02%
Block 8: 6.08%
Block 9: 0.65%
Block 10: -0.74%
Block 11: 4.37%
Block 12: 4.74%
Block 13: 1.08%
Block 14: 0.30%
Block 15: 9.43%

Symbol: DBK.DE
Block 1: 11.57%
Block 2: -6.56%
Block 3: -18.88%
Block 4: 3.61%
Block 5: 0.23%
Block 6: -1.56%
Block 7: 3.01%
Block 8: 0.68%
Block 9: 3.63%
Block 10: 2.89%
Block 11: 7.09%
Block 12: 9.69%
Block 13: -3.06%
Block 14: 3.01%
Block 15: 17.96%

Symbol: VOW3.DE
Block 1: 5.75%
Block 2: 0.16%
Block 3: -11.49%
Block 4: 1.40%
Block 5: -3.60%
Block 6: -0.42%
Block 7: -3.69%
Block 8: -4.88%
Block 9: 0.57%
Block 10: -9.05%
Block 11: 5.26%
Block 12: 4.60%
Block 13: 3.33%
Block 14: 5.04%
Block 15: -1.99%

Symbol: BMW.DE
Block 1: 8.66%
Block 2: 4.28%
Block 3: -0.73%
Block 4: 6.12%
Block 5: -2.28%
Block 6: 4.80%
Block 7: -6.35%
Block 8: -5.64%
Block 9: 2.31%
Block 10: -8.80%
Block 11: 6.31%
Block 12: 4.90%
Block 13: -6.66%
Block 14: 12.79%
Blo

# Calculate the mean return over the period of 4 years

In [15]:
# Dictionary for the mean return per symbol
average_return_per_symbol = {}

for symbol, changes in change_per_symbol.items():
    # Calculate the mean return for the current symbol
    average_return = sum(changes) / len(changes)
    average_return_per_symbol[symbol] = average_return

# Output result
for symbol, avg_return in average_return_per_symbol.items():
    print(f"Durchschnittliche Rendite für {symbol}: {avg_return:.2f}%")
    
# Convert mean return to DataFrame
average_return_per_symbol_df = pd.DataFrame(list(average_return_per_symbol.items()), columns=['Symbol', 'Average_Return'])
print(average_return_per_symbol_df)


Durchschnittliche Rendite für ALV.DE: 2.09%
Durchschnittliche Rendite für DBK.DE: 2.22%
Durchschnittliche Rendite für VOW3.DE: -0.60%
Durchschnittliche Rendite für BMW.DE: 1.18%
Durchschnittliche Rendite für ADS.DE: 3.60%
Durchschnittliche Rendite für BEI.DE: 1.63%
Durchschnittliche Rendite für DTE.SG: 1.30%
Durchschnittliche Rendite für SAP.DE: 4.20%
Durchschnittliche Rendite für 1COV.DE: 1.75%
Durchschnittliche Rendite für BAS.DE: 0.70%
Durchschnittliche Rendite für EOAN.DE: 2.15%
Durchschnittliche Rendite für RWE.DE: -1.28%
     Symbol  Average_Return
0    ALV.DE        2.094361
1    DBK.DE        2.219564
2   VOW3.DE       -0.600645
3    BMW.DE        1.176689
4    ADS.DE        3.599600
5    BEI.DE        1.631362
6    DTE.SG        1.304731
7    SAP.DE        4.196955
8   1COV.DE        1.746802
9    BAS.DE        0.701437
10  EOAN.DE        2.147257
11   RWE.DE       -1.281022


In [16]:
# Dictionary for the standard deviation per symbol
standard_deviation_per_symbol = {}

for symbol, changes in change_per_symbol.items():
    
    # Calculate the difference to the power of 2
    squared_diff = [(change - average_return_per_symbol[symbol]) ** 2 for change in changes]
    
    # Sum up the differences
    sum_squared_diff = sum(squared_diff)

    # Calculate the standard deviation
    standard_deviation = np.sqrt(sum_squared_diff / (len(changes) - 1))
    
    # Save the standard deviation in the dictionary
    standard_deviation_per_symbol[symbol] = standard_deviation

# Output result
for symbol, std_dev in standard_deviation_per_symbol.items():
    print(f"Standardabweichung für {symbol}: {std_dev:.2f}")
    
# Convert mean return to DataFrame
standard_deviation_per_symbol_df = pd.DataFrame(list(standard_deviation_per_symbol.items()), columns=['Symbol', 'Standard_Deviation'])
print(standard_deviation_per_symbol_df)

Standardabweichung für ALV.DE: 3.78
Standardabweichung für DBK.DE: 8.41
Standardabweichung für VOW3.DE: 5.24
Standardabweichung für BMW.DE: 6.38
Standardabweichung für ADS.DE: 7.70
Standardabweichung für BEI.DE: 3.47
Standardabweichung für DTE.SG: 4.68
Standardabweichung für SAP.DE: 6.32
Standardabweichung für 1COV.DE: 7.87
Standardabweichung für BAS.DE: 7.46
Standardabweichung für EOAN.DE: 5.03
Standardabweichung für RWE.DE: 6.86
     Symbol  Standard_Deviation
0    ALV.DE            3.775261
1    DBK.DE            8.413941
2   VOW3.DE            5.235343
3    BMW.DE            6.379693
4    ADS.DE            7.702299
5    BEI.DE            3.474850
6    DTE.SG            4.682255
7    SAP.DE            6.319967
8   1COV.DE            7.871066
9    BAS.DE            7.457333
10  EOAN.DE            5.029626
11   RWE.DE            6.860756


# Historic data from 2018 for the covarianz matrix

In [17]:
excel_file_path = 'settings/lastSequenceForPrediction.xlsx'

# path to the data, in which the index is stored
index_file_path = 'settings/lastSequenceForPrediction.txt'

# read the current index from the index file
try:
    with open(index_file_path, 'r') as index_file:
        current_row_index = int(index_file.read().strip())
except FileNotFoundError:
    current_row_index = 0

# read excel file
df_lastSeq = pd.read_excel(excel_file_path)

# Check, if index is out of bounds
if current_row_index >= len(df_lastSeq):
    print("Es gibt keine weiteren Zeilen in der Excel-Tabelle.")
else:
    # extract data from the current row
    start_date_lastSeq = df_lastSeq.loc[current_row_index, 'start_date']
    end_date_lastSeq = df_lastSeq.loc[current_row_index, 'end_date']
    
    # Output of the current data 
    print(f'Startdatum: {start_date_lastSeq}, Enddatum: {end_date_lastSeq}')

    # Update the index in the index file for the next call
    with open(index_file_path, 'w') as index_file:
        index_file.write(str(current_row_index + 1))

Startdatum: 2023-10-01, Enddatum: 2024-03-31


In [18]:
# Definition of the time period
start_date2018 = '2023-01-01'
end_date2018 = end_date_lastSeq
print(end_date2018)
# Load and store the data for every symbol (share)
for symbol in stock_symbols:
    output_file = f'stock_data_{symbol}_2018.csv'
    download_stock_data(symbol, start_date2018, end_date2018, output_file)

    # Search and delete empty rows in the CSV file
    with open(output_file, 'r') as file:
        lines = file.readlines()

    # Filter the empty rows
    lines = [line.strip() for line in lines if line.strip()]

    # Overwrite the file with the cleaned rows
    with open(output_file, 'w') as file:
        file.write('\n'.join(lines))


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

2024-03-31



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


# Combine historical data to one file

In [19]:
# Data frame for the combined data
combined_historical2018_data = pd.DataFrame()

# load and combine the data for each share
for symbol in stock_symbols:
    # file name for the csv file
    csv_file = f'stock_data_{symbol}_2018.csv'
    
    # read CSV file and set date to index
    df = pd.read_csv(csv_file, index_col='Date', parse_dates=True)
    
    # change name of the close column
    df.rename(columns={'Close': symbol}, inplace=True)
    
    # write data to the combined file
    combined_historical2018_data = pd.concat([combined_historical2018_data, df[symbol]], axis=1)

# drop rows with NaN values
combined_historical2018_data.dropna(inplace=True)
    
# create a CSV file for the data
combined_historical2018_data.to_csv('combined_historical2018_stock_data.csv')

print(combined_historical2018_data)

                ALV.DE  DBK.DE     VOW3.DE      BMW.DE      ADS.DE  \
2023-01-02  203.050003  10.942  120.040001   85.800003  127.699997   
2023-01-03  205.199997  11.112  122.059998   85.830002  131.880005   
2023-01-04  211.500000  11.698  125.879997   87.879997  138.380005   
2023-01-05  209.550003  11.494  127.120003   88.949997  138.539993   
2023-01-06  211.800003  11.596  128.160004   89.529999  140.679993   
...                ...     ...         ...         ...         ...   
2024-03-22  271.850006  14.178  118.080002  104.120003  200.250000   
2024-03-25  275.200012  14.146  119.379997  106.239998  203.949997   
2024-03-26  277.000000  14.230  121.260002  106.559998  204.300003   
2024-03-27  277.450012  14.612  121.760002  106.160004  204.699997   
2024-03-28  277.799988  14.582  122.839996  106.959999  207.000000   

                BEI.DE     DTE.SG      SAP.DE    1COV.DE     BAS.DE  EOAN.DE  \
2023-01-02  107.150002  18.910000   97.419998  38.259998  47.985001    9.468   

# Mean-Variance Optimization

In [20]:
def mean_variance_optimization(expected_returns, standard_deviations, covariance_matrix):
    n = len(expected_returns)
    initial_weights = np.array([1/n] * n)  # Start value of the weighs
    bounds = [(0, 1)] * n  # Border of the weighs (0-100% for every symbol)

    # Minimize the negative sharpe ratio
    def negative_sharpe(weights, expected_returns, standard_deviations, covariance_matrix):
        portfolio_return = np.dot(weights, expected_returns)
        portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(covariance_matrix, weights)))
        return -portfolio_return / portfolio_volatility

    result = minimize(negative_sharpe, initial_weights, args=(expected_returns, standard_deviations, covariance_matrix), bounds=bounds, constraints={'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    return result.x

In [21]:
def allocate_portfolio(historical_data, initial_capital, expected_returns_df, standard_deviations_df):
    # Calculate expected returns based on the predictions
    expected_returns = expected_returns_df['Average_Return'].values
    
    # Calculate standard deviations based on the predictions
    standard_deviations = standard_deviations_df['Standard_Deviation'].values
    
    # Calculate the covariance matrix of the returns
    covariance_matrix = historical_data.cov()
           
    print(covariance_matrix)
    
    # Perform mean-variance optimization to obtain optimal weights
    optimal_weights = mean_variance_optimization(expected_returns, standard_deviations, covariance_matrix)

    # Calculate the allocation of assets based on the optimal weights and the available capital
    asset_allocation = initial_capital * optimal_weights

    return asset_allocation

In [22]:
initial_capital = 1000 + profit # 1000€ start capital
portfolio_allocation = allocate_portfolio(combined_historical2018_data, initial_capital, average_return_per_symbol_df, standard_deviation_per_symbol_df)
print(portfolio_allocation)

             ALV.DE     DBK.DE    VOW3.DE     BMW.DE      ADS.DE      BEI.DE  \
ALV.DE   263.233687  16.722550 -36.220076   5.952651  172.491887  107.009092   
DBK.DE    16.722550   1.551082   0.318550  -0.659780    6.540290    4.771363   
VOW3.DE  -36.220076   0.318550  69.134268  19.760646  -63.554093  -30.880760   
BMW.DE     5.952651  -0.659780  19.760646  34.572252   35.942541   10.627189   
ADS.DE   172.491887   6.540290 -63.554093  35.942541  272.479734   99.324278   
BEI.DE   107.009092   4.771363 -30.880760  10.627189   99.324278   72.536878   
DTE.SG    10.880760   0.681397  -0.801476  -0.534726    3.412920    6.542058   
SAP.DE   279.440764  14.954034 -53.869391  32.660143  250.691795  147.171173   
1COV.DE   46.889052   2.295622 -25.165169   0.619697   59.397072   22.094798   
BAS.DE     4.412254   1.647993  17.191130   1.362113  -12.713713   -7.972791   
EOAN.DE    7.639689   0.216266  -2.472728   1.698286    9.464359    5.717161   
RWE.DE   -32.281946  -1.548413   9.24509

In [23]:
# Initialisation of the portfolio
portfolio_value = 0
# initialize the list for the quantity of the shares, which have to be purchased
shares_to_buy_list = []
# Purchase the shares based on the allocations
for i, allocation in enumerate(portfolio_allocation):
    # Calculate the quantity of the shares, which should be purchased with this allocation
    shares_to_buy = (allocation) / combined_data_begin['Close_Value'][i]
    shares_to_buy_list.append(shares_to_buy)
    print(shares_to_buy)

    # Calculate the value of the stock of the purchased shares
    value_of_stock = shares_to_buy * combined_data_begin['Close_Value'][i]
    print(value_of_stock)
    
    # Add the value of the purchased share to the portfolio
    portfolio_value += value_of_stock
    print(portfolio_value)
    print("-----------------------------")

# Output of the total value of the portfolio at the end of the period to be predicted
print("Gesamtwert des Portfolios am Ende des ersten Monats von 2019:", portfolio_value)
print(shares_to_buy_list)

0.0
0.0
0.0
-----------------------------
15.143872666343063
220.82794786999577
220.82794786999577
-----------------------------
0.16531468482635675
20.307255278669206
241.13520314866497
-----------------------------
0.0
0.0
241.13520314866497
-----------------------------
0.0
0.0
241.13520314866497
-----------------------------
0.0
0.0
241.13520314866497
-----------------------------
0.0
0.0
241.13520314866497
-----------------------------
0.0
0.0
241.13520314866497
-----------------------------
0.0
0.0
241.13520314866497
-----------------------------
0.0
0.0
241.13520314866497
-----------------------------
61.59044094632611
793.5928456903451
1034.7280488390102
-----------------------------
1.2503357938006192
39.33556292825087
1074.0636117672611
-----------------------------
Gesamtwert des Portfolios am Ende des ersten Monats von 2019: 1074.0636117672611
[0.0, 15.143872666343063, 0.16531468482635675, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 61.59044094632611, 1.2503357938006192]


In [24]:
# Define portfolio value
new_portfolio_value = 0
# Loop over the indices of the two lists
for i in range(len(shares_to_buy_list)):
    # convert to float
    last_close_value = float(last_close_values[i][1])
    print(last_close_value)
    print(shares_to_buy_list[i])
    #Calculate the new value of the share and add the value to the list
    new_portfolio_value += (last_close_value * shares_to_buy_list[i])
    print(new_portfolio_value)
    print("----------------------------")

print("Wert des Portfolios:")
print(new_portfolio_value)

266.29998779296875
0.0
0.0
----------------------------
15.104000091552734
15.143872666343063
228.73305413890856
----------------------------
120.75
0.16531468482635675
248.69480233169114
----------------------------
106.8000030517578
0.0
248.69480233169114
----------------------------
232.3000030517578
0.0
248.69480233169114
----------------------------
139.4499969482422
0.0
248.69480233169114
----------------------------
21.71999931335449
0.0
248.69480233169114
----------------------------
171.4199981689453
0.0
248.69480233169114
----------------------------
47.54999923706055
0.0
248.69480233169114
----------------------------
49.05500030517578
0.0
248.69480233169114
----------------------------
12.46500015258789
61.59044094632611
1016.4196581256016
----------------------------
33.0
1.2503357938006192
1057.680739321022
----------------------------
Wert des Portfolios:
1057.680739321022


In [25]:
# just the profit, to store it
profit = new_portfolio_value - 1000

# path to JSON file
json_file_path = 'settings/profit_statistical.json'

# Create Dictionary with the value of new_portfolio_value
data = {'profit': profit}

# Write JSON file
with open(json_file_path, 'w') as json_file:
    json.dump(data, json_file)

print(f'Wert von profit wurde erfolgreich in {json_file_path} gespeichert.')

Wert von profit wurde erfolgreich in settings/profit_statistical.json gespeichert.
