# 01. Librerías y Datos

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import yfinance as yf
from tqdm import tqdm

In [5]:
date_start= '2021-05-01'
date_end= '2024-05-01'
tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'PYPL', 'ADBE', 'NFLX']

In [6]:
df_adj_close = pd.DataFrame()
for ticker in tqdm(tickers):
    data = yf.download(ticker, start=date_start, end=date_end, progress=False)
    df_adj_close[ticker] = data['Adj Close']
df_adj_close

100%|██████████| 10/10 [00:00<00:00, 11.75it/s]


Unnamed: 0_level_0,AAPL,MSFT,GOOGL,AMZN,TSLA,META,NVDA,PYPL,ADBE,NFLX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-05-03,130.091766,245.384064,117.153999,169.324493,228.300003,322.238098,148.078003,259.089996,503.459991,509.109985
2021-05-04,125.488426,241.418716,115.341499,165.593506,224.533340,318.022552,143.232483,250.160004,490.700012,503.179993
2021-05-05,125.733810,240.132675,115.738503,163.526993,223.646667,314.686096,144.302917,247.399994,486.690002,496.079987
2021-05-06,127.343513,243.308838,116.867500,165.318497,221.179993,319.680817,144.946594,252.020004,483.609985,499.549988
2021-05-07,128.021896,245.968643,117.596497,164.580505,224.123337,318.741791,147.833496,253.360001,488.730011,503.839996
...,...,...,...,...,...,...,...,...,...,...
2024-04-24,168.791061,409.059998,159.130005,176.589996,162.130005,493.500000,796.770020,64.839996,477.119995,555.119995
2024-04-25,169.659882,399.040009,156.000000,173.669998,170.179993,441.380005,826.320007,64.099998,473.440002,564.799988
2024-04-26,169.070679,406.320007,171.949997,179.619995,168.289993,443.290009,877.349976,65.959999,477.559998,561.229980
2024-04-29,173.264984,402.250000,166.149994,180.960007,194.050003,432.619995,877.570007,66.989998,473.070007,559.489990


# 02. Exploración de datos

In [7]:
df_long= pd.DataFrame(df_adj_close.stack(0)).reset_index().rename(columns={'level_1': 'Stock', 0: 'Price'})
px.line(df_long, x='Date', y='Price', color='Stock', title='Stock Prices')

In [8]:
px.imshow(round(df_adj_close.corr(),2), color_continuous_scale='RdBu', range_color=(-1, 1),
        text_auto=True, width=600, height=600, title='Correlation of stocks')

In [9]:
df_pct_change= df_adj_close.pct_change()
df_pct_change

Unnamed: 0_level_0,AAPL,MSFT,GOOGL,AMZN,TSLA,META,NVDA,PYPL,ADBE,NFLX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-05-03,,,,,,,,,,
2021-05-04,-0.035385,-0.016160,-0.015471,-0.022035,-0.016499,-0.013082,-0.032723,-0.034467,-0.025345,-0.011648
2021-05-05,0.001955,-0.005327,0.003442,-0.012479,-0.003949,-0.010491,0.007473,-0.011033,-0.008172,-0.014110
2021-05-06,0.012802,0.013227,0.009755,0.010955,-0.011029,0.015872,0.004461,0.018674,-0.006328,0.006995
2021-05-07,0.005327,0.010932,0.006238,-0.004464,0.013307,-0.002937,0.019917,0.005317,0.010587,0.008588
...,...,...,...,...,...,...,...,...,...,...
2024-04-24,0.012702,0.003656,0.005497,-0.016431,0.120611,-0.005241,-0.033316,0.006363,0.008924,-0.039169
2024-04-25,0.005147,-0.024495,-0.019669,-0.016535,0.049651,-0.105613,0.037087,-0.011413,-0.007713,0.017438
2024-04-26,-0.003473,0.018244,0.102244,0.034260,-0.011106,0.004327,0.061756,0.029017,0.008702,-0.006321
2024-04-29,0.024808,-0.010017,-0.033731,0.007460,0.153069,-0.024070,0.000251,0.015616,-0.009402,-0.003100


In [13]:
df_pct_change_long= pd.DataFrame(df_pct_change.stack(0)).reset_index().rename(columns={'level_1': 'Stock', 0: 'Price'})
px.histogram(df_pct_change_long, x='Price', facet_col='Stock', nbins=50, title='Daily Percentage Change in Stock Prices', facet_col_wrap=3, height=800)

# 03. Markowitz Portfolio Optimization

In [20]:
np.random.seed(1)
weights = np.random.random((len(tickers),1))
weights /= np.sum(weights)
weights

array([[1.32543916e-01],
       [2.28943864e-01],
       [3.63522452e-05],
       [9.60916752e-02],
       [4.66440625e-02],
       [2.93483768e-02],
       [5.91998923e-02],
       [1.09831067e-01],
       [1.26106330e-01],
       [1.71254464e-01]])

In [21]:
df_log_return= np.log(df_adj_close/df_adj_close.shift(1))
df_log_return

Unnamed: 0_level_0,AAPL,MSFT,GOOGL,AMZN,TSLA,META,NVDA,PYPL,ADBE,NFLX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-05-03,,,,,,,,,,
2021-05-04,-0.036027,-0.016292,-0.015592,-0.022281,-0.016636,-0.013168,-0.033270,-0.035075,-0.025671,-0.011716
2021-05-05,0.001954,-0.005341,0.003436,-0.012558,-0.003957,-0.010547,0.007446,-0.011094,-0.008206,-0.014211
2021-05-06,0.012721,0.013140,0.009707,0.010896,-0.011091,0.015747,0.004451,0.018502,-0.006349,0.006970
2021-05-07,0.005313,0.010872,0.006218,-0.004474,0.013220,-0.002942,0.019721,0.005303,0.010531,0.008551
...,...,...,...,...,...,...,...,...,...,...
2024-04-24,0.012622,0.003649,0.005482,-0.016567,0.113874,-0.005255,-0.033884,0.006343,0.008884,-0.039957
2024-04-25,0.005134,-0.024800,-0.019866,-0.016674,0.048458,-0.111617,0.036416,-0.011478,-0.007743,0.017287
2024-04-26,-0.003479,0.018079,0.097348,0.033687,-0.011168,0.004318,0.059924,0.028604,0.008665,-0.006341
2024-04-29,0.024505,-0.010067,-0.034313,0.007433,0.142427,-0.024364,0.000251,0.015495,-0.009446,-0.003105


In [22]:
expected_return= df_log_return.mean().dot(weights)*252
expected_return

array([0.03441206])

In [23]:
expectec_volatility= np.sqrt(np.dot(weights.T, np.dot(df_log_return.cov()*252, weights)))
expectec_volatility

array([[0.29553863]])

In [24]:
sharpe_ratio= expected_return/expectec_volatility
sharpe_ratio

array([[0.11643846]])

# 04. Simulation

In [25]:
# number of simulation
n = 50_000
# n = 10

port_weights = np.zeros(shape=(n,len(df_adj_close.columns)))
port_volatility = np.zeros(n)
port_sr = np.zeros(n)
port_return = np.zeros(n)

num_securities = len(df_adj_close.columns)
# num_securities
for i in tqdm(range(n)):
    # Weight each security
    weights = np.random.random(len(tickers))
    # normalize it, so that some is one
    weights /= np.sum(weights)
    port_weights[i,:] = weights 
    #     print(f'Normalized Weights : {weights.flatten()}')

    # Expected return (weighted sum of mean returns). Mult by 252 as we always do annual calculation and year has 252 business days
    exp_ret = df_log_return.mean().dot(weights)*252 
    port_return[i] = exp_ret
#     print(f'\nExpected return is : {exp_ret[0]}')

    # Exp Volatility (Risk)
    exp_vol = np.sqrt(weights.T.dot(252*df_log_return.cov().dot(weights)))
    port_volatility[i] = exp_vol
#     print(f'\nVolatility : {exp_vol[0][0]}')

    # Sharpe ratio
    sr = exp_ret / exp_vol
    port_sr[i] = sr
#     print(f'\nSharpe ratio : {sr[0][0]}')

100%|██████████| 50000/50000 [01:45<00:00, 471.90it/s]


In [26]:
df_results= pd.DataFrame({'Return': port_return, 'Volatility': port_volatility, 'Sharpe Ratio': port_sr})
df_results

Unnamed: 0,Return,Volatility,Sharpe Ratio
0,0.056134,0.309394,0.181431
1,0.058870,0.308113,0.191066
2,0.040564,0.325345,0.124681
3,0.136544,0.304882,0.447858
4,0.101007,0.325935,0.309900
...,...,...,...
49995,-0.034261,0.296716,-0.115466
49996,0.025842,0.318007,0.081264
49997,-0.023094,0.306797,-0.075273
49998,0.122972,0.314486,0.391026


In [27]:
# Index of max Sharpe Ratio
max_sr = port_sr.max()
ind = port_sr.argmax()
# Return and Volatility at Max SR
max_sr_ret = port_return[ind]
max_sr_vol = port_volatility[ind]

In [28]:
fig= px.scatter(df_results, x='Volatility', y='Return', color='Sharpe Ratio', title='Portfolio Optimization', width=800, height=600)
#add the max SR point
fig.add_trace(px.scatter(x=[max_sr_vol], y=[max_sr_ret], color=[max_sr], size=[100]).data[0])
fig.show()

In [29]:
for weight, stock in zip(port_weights[ind],(df_adj_close.columns)):
    print(f'{round(weight * 100, 2)} % of {stock} should be bought.')
    
# best portfolio return
print(f'\nMarkowitz optimal portfolio return is : {round(max_sr_ret * 100, 2)}% with volatility \
{max_sr_vol}')

9.96 % of AAPL should be bought.
31.45 % of MSFT should be bought.
4.71 % of GOOGL should be bought.
11.94 % of AMZN should be bought.
0.38 % of TSLA should be bought.
10.77 % of META should be bought.
27.02 % of NVDA should be bought.
0.81 % of PYPL should be bought.
1.99 % of ADBE should be bought.
0.97 % of NFLX should be bought.

Markowitz optimal portfolio return is : 22.97% with volatility 0.32136964288910796
