In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import yfinance as yf
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
def find_stock_name(stock_ticker:str)->str:
    """return stock or asset name given the stock/asset ticker name"""
    for key, val in dax_assets.items():
        if val == stock_ticker:
            return key

In [3]:
# top 40 dax companies 
dax_assets = {
               'DAX': '^GDAXI',
               'Linde': 'LIN',
               'SAP': 'SAP',
               'Deutsche Telekom': 'DTE.DE',
               'Volkswagen': 'VOW3.DE',
               'Siemens': 'SIE.DE',
               'Merck': 'MRK.DE',
               'Airbus': 'AIR.PA',
               'Mercedes Benz': 'MBG.DE', 
               'Bayer': 'BAYZF',
               'BMW': 'BMW.DE',
               'Siemens Healthineers': 'SHL.DE',
               'Deutsche Post': 'DPW.DE',
               'BASF': 'BAS.DE',
               'Münchner Rück': 'MUV2.DE',
               'Infineon': 'IFX.DE',
               'Deutsche Börse': 'DB1:DE',
               'RWE': 'RWE.DE',
               'Henkel': 'HEN3.DE',
               'Adidas': 'ADS.DE',
               'Sartorius': 'SRT.DE',
               'Beiersdorf': 'BEI.DE',
               'Porsche': 'PAH3.DE',
               'E.ON': 'EOAN.DE',
               'Deutsche Bank': 'DB',
               'Vonovia': 'VNA.DE',
               'Fresenius': 'FRE.DE',
               'Symrise': 'SY1.DE',
               'Continental': 'CON.DE',
               'Delivery Hero': 'DHER.F',
               'Brenntag': 'BNR.DE',
               'Qiagen': 'QGEN',
               'Fresenius Medical Care': 'FMS',
               'Siemens Energy': 'ENR.F',
               'HeidelbergCement': 'HEI.DE',
               'Puma': 'PUM.DE',
               'MTU Aero Engines': 'MTX.DE',
               'Covestro': '1COV.F',
               'Zalando': 'ZAL.DE',
               'HelloFresh': 'HFG.DE'
            }

In [4]:
dax_top_40_companies = list(dax_assets.keys())#[1:]
dax_top_40_tickername = list(dax_assets.values())#[1:]

In [5]:
start_date = datetime(2019,1,1)
str(start_date.year)+'-'+str(start_date.month)+'-'+str(start_date.day)

'2019-1-1'

In [6]:
start_date = '2010-1-1'
end_date = '2022-9-15'
na_percentage = 0.6 # at least x percent rows must be none-nas

In [7]:
# download DAX top40
df = yf.download(dax_top_40_tickername, 
                start=start_date, 
                end=end_date,
                progress=True)
# drop columns
df = df['Close']
# get actual stock names
stock_names = [find_stock_name(stock_ticker) for stock_ticker in df.columns]
# rename columns
df.columns = stock_names
# make sure the index is datetime format
df.index = pd.to_datetime(df.index)
# drop nas
datetimeFormat = '%Y-%m-%d'
time_delta = datetime.strptime(end_date, datetimeFormat) - datetime.strptime(start_date,datetimeFormat)
df.dropna(axis=1, thresh=int(time_delta.days * na_percentage), inplace=True)
# show data
df.head()

[*********************100%***********************]  40 of 40 completed

1 Failed download:
- DB1:DE: No data found, symbol may be delisted


Unnamed: 0_level_0,Adidas,Airbus,BASF,Bayer,Beiersdorf,BMW,Brenntag,Continental,Deutsche Bank,Deutsche Post,...,Porsche,Puma,Qiagen,RWE,SAP,Siemens,Sartorius,Symrise,Volkswagen,DAX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-12-31,,,,80.400002,,,,,67.662216,,...,,,23.212057,,46.810001,,,,,
2010-01-04,38.505001,13.99,44.849998,80.400002,46.445,32.049999,,36.867805,69.875954,13.98,...,37.068539,24.09,23.555094,68.731125,47.110001,63.442383,3.85,15.515,64.114349,6048.299805
2010-01-05,39.720001,13.945,44.169998,80.400002,46.200001,32.310001,,39.17416,71.21183,14.0,...,38.269535,24.01,23.264032,68.292587,46.82,63.48114,3.9675,15.52,62.871826,6031.859863
2010-01-06,39.400002,14.07,44.450001,78.949997,46.165001,32.810001,,39.430424,69.980919,14.155,...,39.021763,23.625,23.513514,68.163017,47.91,63.713711,3.9675,16.02,64.611359,6034.330078
2010-01-07,39.744999,14.21,44.154999,78.949997,45.700001,33.099998,,44.483124,70.772903,14.125,...,41.27417,23.504999,23.617464,67.873978,49.169998,64.469551,3.955,15.91,65.35688,6019.359863


In [8]:
# check if any NAs 
df.isna().sum()

Adidas                     60
Airbus                     34
BASF                       60
Bayer                      88
Beiersdorf                 60
BMW                        60
Brenntag                  120
Continental                60
Deutsche Bank              88
Deutsche Post              60
Deutsche Telekom           60
E.ON                       60
Fresenius Medical Care     88
Fresenius                  60
HeidelbergCement           60
Henkel                     60
Infineon                   60
Linde                      88
Mercedes Benz              60
Merck                      60
MTU Aero Engines           60
Münchner Rück              60
Porsche                    60
Puma                       60
Qiagen                     88
RWE                        60
SAP                        88
Siemens                    60
Sartorius                  60
Symrise                    60
Volkswagen                 60
DAX                        65
dtype: int64

In [9]:
df = df.interpolate(method='time', limit=7).fillna(value=None, method='bfill', axis=0, inplace=False, limit=7, downcast=None)

In [10]:
# check if any NAs left
df.isna().sum()

Adidas                     0
Airbus                     0
BASF                       0
Bayer                      0
Beiersdorf                 0
BMW                        0
Brenntag                  55
Continental                0
Deutsche Bank              0
Deutsche Post              0
Deutsche Telekom           0
E.ON                       0
Fresenius Medical Care     0
Fresenius                  0
HeidelbergCement           0
Henkel                     0
Infineon                   0
Linde                      0
Mercedes Benz              0
Merck                      0
MTU Aero Engines           0
Münchner Rück              0
Porsche                    0
Puma                       0
Qiagen                     0
RWE                        0
SAP                        0
Siemens                    0
Sartorius                  0
Symrise                    0
Volkswagen                 0
DAX                        0
dtype: int64

In [11]:
df.head()

Unnamed: 0_level_0,Adidas,Airbus,BASF,Bayer,Beiersdorf,BMW,Brenntag,Continental,Deutsche Bank,Deutsche Post,...,Porsche,Puma,Qiagen,RWE,SAP,Siemens,Sartorius,Symrise,Volkswagen,DAX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-12-31,38.505001,13.99,44.849998,80.400002,46.445,32.049999,,36.867805,67.662216,13.98,...,37.068539,24.09,23.212057,68.731125,46.810001,63.442383,3.85,15.515,64.114349,6048.299805
2010-01-04,38.505001,13.99,44.849998,80.400002,46.445,32.049999,,36.867805,69.875954,13.98,...,37.068539,24.09,23.555094,68.731125,47.110001,63.442383,3.85,15.515,64.114349,6048.299805
2010-01-05,39.720001,13.945,44.169998,80.400002,46.200001,32.310001,,39.17416,71.21183,14.0,...,38.269535,24.01,23.264032,68.292587,46.82,63.48114,3.9675,15.52,62.871826,6031.859863
2010-01-06,39.400002,14.07,44.450001,78.949997,46.165001,32.810001,,39.430424,69.980919,14.155,...,39.021763,23.625,23.513514,68.163017,47.91,63.713711,3.9675,16.02,64.611359,6034.330078
2010-01-07,39.744999,14.21,44.154999,78.949997,45.700001,33.099998,,44.483124,70.772903,14.125,...,41.27417,23.504999,23.617464,67.873978,49.169998,64.469551,3.955,15.91,65.35688,6019.359863


In [12]:
# plot daily portfolio worth
fig = px.line(df,
              y="Linde", 
              title='Linde')
fig.show()

# Predict Single Stock Price using FB Neural Prophet

In [10]:
from neuralprophet import NeuralProphet

In [11]:
df_stock = df[['Linde']].copy()
df_stock = df_stock.rename(columns={'Linde':'y', 'Date':'ds'})
df_stock['ds'] = df_stock.index
df_stock = df_stock.reset_index()
df_stock = df_stock[['ds','y']]
df_stock

Unnamed: 0,ds,y
0,2009-12-31,38.505001
1,2010-01-04,38.505001
2,2010-01-05,39.720001
3,2010-01-06,39.400002
4,2010-01-07,39.744999
...,...,...
3281,2022-09-08,142.360001
3282,2022-09-09,145.360001
3283,2022-09-12,149.220001
3284,2022-09-13,142.899994


In [12]:
METRICS = ['MAE', 'RMSE']
params = {"n_forecasts": 180, "n_lags": 30}

In [13]:
# instantiate model object
m = NeuralProphet(**params)

In [15]:
# set up cross validation
folds = m.crossvalidation_split_df(df_stock, freq="D", k=5, fold_pct=0.20, fold_overlap_pct=0.5)

metrics_train = pd.DataFrame(columns=METRICS)
metrics_test = pd.DataFrame(columns=METRICS)

for df_train, df_test in folds:
    m = NeuralProphet(**params)
    train = m.fit(df=df_train, freq="D")
    test = m.test(df=df_test)
    metrics_train = metrics_train.append(train[METRICS].iloc[-1])
    metrics_test = metrics_test.append(test[METRICS].iloc[-1])

INFO - (NP.df_utils._infer_frequency) - Major frequency B corresponds to 99.026% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.forecaster.__handle_missing_data) - 1355 missing dates added.
INFO - (NP.forecaster.__handle_missing_data) - 1355 NaN values in column y were auto-imputed.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.948% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 165
 95%|█████████▌| 125/131 [00:04<00:00, 28.34it/s]
INFO - (NP.utils_torch.lr_range

In [16]:
metrics_test.describe().loc[["mean", "std", "min", "max"]]

Unnamed: 0,MAE,RMSE
mean,65.0153,74.123894
std,35.179847,39.654567
min,33.532257,36.954548
max,119.901978,134.406784


In [14]:
# make a two fold validation
m = NeuralProphet(**params)
df_train, df_val = m.split_df(df_stock, freq='D', valid_p=0.25)
#metrics = m.fit(df_train, validation_df=df_val, progress="plot-all")
metrics = m.fit(df_train, validation_df=df_val)

INFO - (NP.df_utils._infer_frequency) - Major frequency B corresponds to 99.026% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.forecaster.__handle_missing_data) - 1355 missing dates added.
INFO - (NP.forecaster.__handle_missing_data) - 1355 NaN values in column y were auto-imputed.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.971% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 139
100%|██████████| 137/137 [00:04<00:00, 30.51it/s]
INFO - (NP.utils_torch.lr_range_test) -

In [15]:
metrics

Unnamed: 0,SmoothL1Loss,MAE,RMSE,RegLoss,SmoothL1Loss_val,MAE_val,RMSE_val
0,1.211728,278.893386,340.327213,0.0,1.268804,287.922178,357.514023
1,0.771233,201.866807,242.903273,0.0,0.905581,223.734480,277.755940
2,0.449282,141.807039,170.238760,0.0,0.445072,138.133790,172.186494
3,0.162350,79.643815,94.812424,0.0,0.113063,65.004214,80.550765
4,0.031122,34.241994,40.760369,0.0,0.038069,37.297459,46.602156
...,...,...,...,...,...,...,...
134,0.000678,4.904515,6.210573,0.0,0.048734,46.576047,52.497677
135,0.000672,4.904515,6.180601,0.0,0.049033,46.775262,52.668467
136,0.000665,4.858998,6.147952,0.0,0.048705,46.605339,52.482212
137,0.000668,4.895664,6.162558,0.0,0.048553,46.501569,52.391814


In [16]:
#metrics[['MAE', 'MAE_val']].plot()

: 

: 

In [13]:
# train model on all data
m = NeuralProphet(**params)

# fit model
metrics = m.fit(df_stock)

# Predictions
future = m.make_future_dataframe(df=df_stock, periods=365, n_historic_predictions=len(df_stock)) #we need to specify the number of days in future
forecast = m.predict(future)

INFO - (NP.df_utils._infer_frequency) - Major frequency B corresponds to 99.026% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as B
INFO - (NP.forecaster.__handle_missing_data) - 29 missing dates added.
INFO - (NP.forecaster.__handle_missing_data) - 30 NaN values in column y were auto-imputed.
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 140
100%|██████████| 137/137 [00:03<00:00, 37.60it/s]
INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 3.56E-01, min: 2.29E-02
100%|██████████| 137/137 [00:03<00:00, 39.73it/s]
INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 3.06E-01, min

In [14]:
forecast.head()

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,yhat4,residual4,...,ar174,ar175,ar176,ar177,ar178,ar179,ar180,trend,season_yearly,season_weekly
0,2009-12-31,38.505001,,,,,,,,,...,,,,,,,,,,
1,2010-01-01,38.505001,,,,,,,,,...,,,,,,,,,,
2,2010-01-04,38.505001,,,,,,,,,...,,,,,,,,,,
3,2010-01-05,39.720001,,,,,,,,,...,,,,,,,,,,
4,2010-01-06,39.400002,,,,,,,,,...,,,,,,,,,,


In [17]:
# plot the data
fig = go.Figure()
fig = fig.add_trace(go.Line(x = forecast['ds'],
                            y = forecast['y'], 
                            name = 'price'))

fig = fig.add_trace(go.Line(x = forecast['ds'],
                            y = forecast['yhat180'], 
                            name = 'forecast'))

fig = fig.add_trace(go.Line(x = forecast['ds'],
                            y = forecast['trend'], 
                            name = 'trend', 
                            line=dict(color='black', width=4)))       
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.





In [None]:
# plot forecast
fig_forecast = m.plot(forecast)

In [16]:
for col in forecast.columns:
    print(col)

ds
y
yhat1
residual1
yhat2
residual2
yhat3
residual3
yhat4
residual4
yhat5
residual5
yhat6
residual6
yhat7
residual7
yhat8
residual8
yhat9
residual9
yhat10
residual10
yhat11
residual11
yhat12
residual12
yhat13
residual13
yhat14
residual14
yhat15
residual15
yhat16
residual16
yhat17
residual17
yhat18
residual18
yhat19
residual19
yhat20
residual20
yhat21
residual21
yhat22
residual22
yhat23
residual23
yhat24
residual24
yhat25
residual25
yhat26
residual26
yhat27
residual27
yhat28
residual28
yhat29
residual29
yhat30
residual30
yhat31
residual31
yhat32
residual32
yhat33
residual33
yhat34
residual34
yhat35
residual35
yhat36
residual36
yhat37
residual37
yhat38
residual38
yhat39
residual39
yhat40
residual40
yhat41
residual41
yhat42
residual42
yhat43
residual43
yhat44
residual44
yhat45
residual45
yhat46
residual46
yhat47
residual47
yhat48
residual48
yhat49
residual49
yhat50
residual50
yhat51
residual51
yhat52
residual52
yhat53
residual53
yhat54
residual54
yhat55
residual55
yhat56
residual56
yhat5

In [17]:
m.plot_components(forecast)
#plt.show()

KeyboardInterrupt: 

In [20]:
# normalized daily returns
def normalized_returns(df):
    return np.log(1 + df.pct_change(periods=1).fillna(value=None, method='bfill', axis=0, inplace=False, limit=7, downcast=None)) 

# create weights
def weight_creator(df):
    rand = np.random.random(len(df.columns))
    rand /= rand.sum()
    return rand

# calculate portfolio return
def portfolio_returns(df, weights):
    return np.dot(df.mean(), weights)

# calculate portfolios standard deviation
def portfolio_std(df, weights):
    return (np.dot(np.dot(df.cov(), weights), weights))**(1/2)*np.sqrt(250)

def portfolio_sharp_ratio(portfolio_return:float, portfolio_std:float, rfr:float)->float:
    """Calculate the sharp ratio for a given portfolio df and a given risk-free-return "rfr"."""
    ntd = 250 #  number of trading days
    return np.divide(portfolio_return - rfr/ntd, portfolio_std) 

In [21]:
# plot daily portfolio worth
fig = px.line(normalized_returns(df), 
              y="Linde", 
              title='Linde: Normalized Returns')
fig.show()

In [22]:
# testing the functions
df_returns = normalized_returns(df)
weights = weight_creator(df_returns)
sdev = portfolio_std(df_returns, weights)
returns = portfolio_returns(df_returns, weights)

print('portfolio return: {:.5f}'.format(returns))
print('portfolio standard deviation: {:.2f}'.format(sdev))

portfolio return: 0.00023
portfolio standard deviation: 0.18


In [23]:

def sim2weights_df(weights:list, stock_names:list)->pd.DataFrame:
    """Make dataframe given the portfolios´s weights from the simulations.  
    """
    # initialize weights dataframe with the weights from first simulation
    df_weights = pd.DataFrame(weights[0]).T
    for weights in weights[1:]:
        df_weights = df_weights.append(pd.DataFrame(weights).T, ignore_index=True) 
    # rename columns
    df_weights.columns = stock_names
    return df_weights

def sim2kpi_df(sim_returns:list, sim_standard_deviations:list, sim_sharp_ratios:list)->pd.DataFrame:
    """Generate key performance indicator dataframe from simulation returns, simulation standard deviations and simulation sharp ratios."""
    return pd.DataFrame(data={'portfolio return': sim_returns, 
                               'portfolio standard dev': sim_standard_deviations, 
                               'portfolio sharp ratio': sim_sharp_ratios})

def sim2_df(sim_returns:list, sim_standard_deviations:list, sim_sharp_ratios:list, weights:list, stock_names:list)->pd.DataFrame:
    """Save monte-carlo simulation results in dataframe"""
    sim_weights_df = sim2weights_df(weights, stock_names)
    sim_kpi_df = sim2kpi_df(sim_returns, sim_standard_deviations, sim_sharp_ratios) 
    return sim_weights_df.join(sim_kpi_df)

In [24]:
# Monte-Carlo Simulation
w = []
returns = []
stds = []
srs = []

# caclulate normalized daily returns 
df_returns = normalized_returns(df)

for n in range(1000):
    weights = weight_creator(df_returns)
    portfolio_return = portfolio_returns(df_returns, weights)
    portfolio_stdev = portfolio_std(df_returns, weights) 
    portfolio_sr = portfolio_sharp_ratio(portfolio_return, portfolio_stdev, rfr=0.02)
    w.append(weights)
    returns.append(portfolio_return)
    stds.append(portfolio_stdev)
    srs.append(portfolio_sr)

In [25]:
# save simulation results in dataframe
stock_names = list(df.columns)
df_simulation = sim2_df(returns, stds, srs, w, stock_names)
df_simulation.head()

Unnamed: 0,Linde,SAP,Deutsche Telekom,Volkswagen,Siemens,Merck,Airbus,Mercedes Benz,Bayer,Deutsche Post,...,Brenntag,Qiagen,Siemens Energy,HeidelbergCement,Puma,Covestro,HelloFresh,portfolio return,portfolio standard dev,portfolio sharp ratio
0,0.022147,0.055435,0.027889,0.009383,0.055904,0.056206,0.049335,0.04843,0.034178,0.040707,...,6.4e-05,0.010146,0.006141,0.032055,0.021422,0.024988,0.038204,0.000263,0.199006,0.000919
1,0.010755,0.011474,0.014587,0.014233,0.042171,0.05924,0.026014,0.017771,0.057596,0.013702,...,0.04907,0.011103,0.049459,0.04425,0.056036,0.029157,0.046977,0.000236,0.189861,0.00082
2,0.01625,0.019093,0.03633,0.05029,0.00041,0.019089,0.024687,0.029158,0.001273,0.029189,...,0.015669,0.003594,0.008616,0.053547,0.06852,0.053855,0.010108,0.000256,0.187519,0.000937
3,0.022065,0.042569,0.009032,0.002387,0.055074,0.019849,0.026224,0.005692,0.029986,0.042436,...,0.055907,0.055872,0.043904,0.020357,0.050904,0.032896,0.054449,0.000218,0.188548,0.00073
4,0.019628,0.041418,0.030965,0.002812,0.040363,0.036851,0.039006,0.056791,0.030794,0.05743,...,0.038288,0.013468,0.045853,0.014724,0.026684,0.02318,0.000608,0.000199,0.194401,0.00061


In [26]:
# plot returns vs risk
fig = px.scatter(x=df_simulation['portfolio standard dev'], 
                 y=df_simulation['portfolio return']*100,
                 color=df_simulation['portfolio sharp ratio']*100,
                 labels={'y': 'return [%]', 'x': 'standard deviation', 'color': 'sharp ratio'},
                 width=600, 
                 title='Portfolio´s Returns and Risks Monte-Carlo Simulation')
fig.show()

In [27]:
# find mimimum risk (i.e. variance) portfolio
def min_variance_portfolio(df_simulation:pd.DataFrame, stocknames:list)->pd.DataFrame:
    """Get weights of the portfolio with minimal variance or risk. Input dataframe of the simulation and a list of the stock names"""
    return df_simulation.sort_values(by='portfolio standard dev')[stocknames].iloc[0,:].values

In [28]:
weights_opt = min_variance_portfolio(df_simulation, stock_names)

In [29]:
# testing the functions
df_returns = normalized_returns(df)
sdev = portfolio_std(df_returns, weights_opt)
returns = portfolio_returns(df_returns, weights_opt)

print('portfolio return: {:.5f}'.format(returns*100))
print('portfolio standard deviation: {:.2f}'.format(sdev))

portfolio return: 0.02907
portfolio standard deviation: 0.17


In [30]:
# find optimal portfolio for given risk

# define risk bounds i.e. maximal acceptable risk
risk_bounds = (0, 0.2)
risk_indices = df_simulation['portfolio standard dev'].between(risk_bounds[0], risk_bounds[1])

# find optimal weights
weights_opt = df_simulation[risk_indices].sort_values(by='portfolio return', ascending=False)[stock_names].iloc[0,:].values
weights_opt

array([4.07808960e-02, 5.24877256e-02, 2.83977941e-02, 2.32498782e-02,
       2.28786662e-02, 1.74549133e-02, 6.93463028e-02, 3.76039422e-02,
       7.36553244e-03, 3.36245226e-02, 3.18751244e-02, 1.09758221e-02,
       5.89121959e-03, 4.38003507e-02, 2.92156483e-03, 2.57814979e-02,
       6.80334021e-02, 4.62167796e-02, 7.73642707e-05, 3.89168484e-02,
       3.20685689e-02, 6.84792038e-03, 3.13703343e-02, 6.57798283e-02,
       1.90466234e-02, 7.54566502e-03, 3.68673113e-02, 5.98940129e-02,
       5.64744766e-02, 2.22188960e-02, 3.19597811e-02, 2.22464344e-02])

In [31]:
# testing the functions
df_returns = normalized_returns(df)
sdev = portfolio_std(df_returns, weights_opt)
returns = portfolio_returns(df_returns, weights_opt)

print('portfolio return: {:.5f}'.format(returns*100))
print('portfolio standard deviation: {:.2f}'.format(sdev))

portfolio return: 0.03434
portfolio standard deviation: 0.19
