In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from matplotlib import pyplot as plt

In [10]:
# Downloading S&P 500 data for the past 5 years
sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0] # Taking the ticker symbols from wikipedia
sp500['Symbol'] = sp500['Symbol'].str.replace('.', '-') # Yahoo Finance uses dashes instead of dots
tickers = sp500['Symbol'].unique().tolist() # Get all unique tickers
end_date = pd.to_datetime('2024-03-31') # End date is the last date of FY23-24
start_date = end_date - pd.DateOffset(years=5) # Taking 5 years of data
df = yf.download(tickers, start=start_date, end=end_date) # Downloading data

[*********************100%%**********************]  503 of 503 completed


8 Failed downloads:
['PSX', 'SWKS', 'XYL', 'IDXX', 'WRK', 'CZR']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2019-03-31 00:00:00 -> 2024-03-31 00:00:00)')
['SOLV', 'GEV']: Exception("%ticker%: Data doesn't exist for startDate = 1554004800, endDate = 1711857600")





In [11]:
adj_close = df['Adj Close'] # Taking the adjusted close prices
adj_close = adj_close.dropna(axis=1) # Dropping columns with all NaN values
adj_close.to_csv('sp500_adj_close.csv') # Saving the data to a csv file
print("Shape of the data in (Days, Companies):", adj_close.shape)

Shape of the data in (Days, Companies): (1258, 486)


In [27]:
# Finding lognormal daily returns
daily_returns = np.log(adj_close / adj_close.shift(1)).dropna() # Calculating lognormal daily returns
daily_returns.to_csv('sp500_daily_returns.csv') # Saving the data to a csv file
# Tickers used
tick_taken = daily_returns.columns
daily_returns

Unnamed: 0_level_0,A,AAL,AAPL,ABBV,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WST,WTW,WY,WYNN,XEL,XOM,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-04-02,-0.005163,0.019591,0.014432,0.027954,-0.000502,-0.007376,-0.005403,-0.003017,-0.001571,-0.009890,...,-0.004573,0.006171,0.002260,0.043052,0.000180,-0.004291,-0.003985,-0.002037,0.008037,0.004912
2019-04-03,0.009811,0.021590,0.006831,0.000120,-0.001508,0.001849,0.010325,0.000553,0.015875,-0.000947,...,0.004752,0.003492,0.003755,0.019072,-0.003785,-0.005916,0.003587,-0.008032,0.006625,0.000784
2019-04-04,-0.013639,0.006505,0.001739,-0.003255,-0.011131,-0.000308,-0.000169,-0.013386,0.006353,0.012007,...,0.000537,0.003873,0.005979,0.014714,-0.001627,0.014115,-0.000896,0.000632,-0.004801,-0.001372
2019-04-05,0.007887,0.003824,0.006672,0.007699,0.004822,0.003689,0.005573,-0.001644,0.008288,0.004670,...,0.008989,-0.002412,0.020285,0.009052,0.007748,0.005348,-0.004890,0.005123,0.019985,0.001372
2019-04-08,0.002697,-0.005299,0.015614,0.006331,-0.006094,0.002758,0.004257,0.005072,0.002509,0.003720,...,0.002301,0.002075,-0.011013,0.027365,-0.007206,0.006164,-0.003708,0.012111,0.014144,0.001077
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-22,-0.008711,0.002703,0.005296,0.005338,-0.008465,-0.000110,-0.022066,-0.023211,-0.008439,-0.007552,...,-0.004989,-0.004747,0.002249,-0.000701,-0.000574,0.000000,-0.002493,0.001023,-0.012345,0.003440
2024-03-25,-0.012901,0.006725,-0.008335,0.000448,-0.005077,0.003413,-0.010964,0.015317,-0.014995,0.008192,...,-0.014138,0.001865,-0.003376,-0.004817,0.004010,0.010169,-0.011889,0.005566,0.022058,-0.006474
2024-03-26,-0.007932,0.000000,-0.006695,0.003690,0.013453,-0.004406,0.007669,0.000729,-0.008799,-0.011100,...,-0.000438,0.001533,-0.001128,0.011701,-0.013042,-0.007529,0.001336,-0.000313,-0.022264,-0.012835
2024-03-27,0.020290,0.025150,0.020991,0.006453,0.017602,0.009777,0.013435,-0.006324,0.022863,0.018749,...,0.015033,0.009687,0.013449,0.011958,0.030046,0.010317,0.018522,0.038277,0.026350,0.016997


In [15]:
# Creating the covariance matrix
cov_matrix = daily_returns.cov() # Calculating the covariance matrix
cov_matrix.to_csv('sp500_cov_matrix.csv') # Saving the data to a csv file
cov_matrix

Unnamed: 0,A,AAL,AAPL,ABBV,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WST,WTW,WY,WYNN,XEL,XOM,YUM,ZBH,ZBRA,ZTS
A,0.000347,0.000216,0.000197,0.000108,0.000183,0.000165,0.000198,0.000226,0.000232,0.000139,...,0.000209,0.000148,0.000240,0.000232,0.000112,0.000129,0.000129,0.000161,0.000266,0.000205
AAL,0.000216,0.001448,0.000240,0.000097,0.000126,0.000324,0.000244,0.000203,0.000320,0.000253,...,0.000070,0.000220,0.000468,0.000720,0.000074,0.000324,0.000243,0.000326,0.000352,0.000168
AAPL,0.000197,0.000240,0.000400,0.000106,0.000167,0.000165,0.000227,0.000311,0.000275,0.000136,...,0.000191,0.000161,0.000261,0.000276,0.000127,0.000136,0.000143,0.000157,0.000288,0.000206
ABBV,0.000108,0.000097,0.000106,0.000254,0.000116,0.000125,0.000112,0.000111,0.000116,0.000095,...,0.000096,0.000092,0.000139,0.000136,0.000085,0.000111,0.000082,0.000111,0.000112,0.000114
ABT,0.000183,0.000126,0.000167,0.000116,0.000273,0.000141,0.000166,0.000185,0.000174,0.000113,...,0.000180,0.000137,0.000193,0.000130,0.000131,0.000085,0.000118,0.000147,0.000189,0.000178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XOM,0.000129,0.000324,0.000136,0.000111,0.000085,0.000227,0.000148,0.000105,0.000193,0.000229,...,0.000064,0.000127,0.000281,0.000350,0.000091,0.000467,0.000139,0.000187,0.000192,0.000108
YUM,0.000129,0.000243,0.000143,0.000082,0.000118,0.000175,0.000158,0.000138,0.000169,0.000127,...,0.000110,0.000141,0.000229,0.000257,0.000112,0.000139,0.000261,0.000172,0.000167,0.000151
ZBH,0.000161,0.000326,0.000157,0.000111,0.000147,0.000202,0.000184,0.000155,0.000202,0.000148,...,0.000133,0.000151,0.000249,0.000329,0.000102,0.000187,0.000172,0.000387,0.000210,0.000157
ZBRA,0.000266,0.000352,0.000288,0.000112,0.000189,0.000218,0.000266,0.000313,0.000339,0.000187,...,0.000211,0.000180,0.000349,0.000366,0.000110,0.000192,0.000167,0.000210,0.000701,0.000236


In [17]:
def expected_return(weights, daily_returns): # Function to calculate the returns
    return np.sum(daily_returns.mean() * weights) * 252 # Annualizing the returns (252 trading days in a year)

def expected_volatility(weights, cov_matrix): # Function to calculate the volatility
    return np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights))) # Annualizing the volatility

risk_free_rate = 0.0425 # Risk-free rate taken from average long term US treasury bond yield, usually considered risk-free

def neg_sharpe(weights, cov_matrix, daily_returns, risk_free_rate): # Function to calculate the negative Sharpe ratio
    return -((expected_return(weights, daily_returns) - risk_free_rate) / expected_volatility(weights, cov_matrix))

In [34]:
initial_weights = np.array([1/len(adj_close.columns)] * len(adj_close.columns)) # Initial weights for the optimization
bounds = [(0.0001, 0.1)] * len(adj_close.columns) # Bounds for the weights, 0 implies no shorting allowed
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1}) # Constraints for the weights, that they should sum to 1

In [35]:
# Optimizing the portfolio
optimal_weights = minimize(neg_sharpe, initial_weights, args=(cov_matrix, daily_returns, risk_free_rate), method='SLSQP', bounds=bounds, constraints=constraints)



In [36]:
optimal_weights.x

array([0.0001    , 0.0001    , 0.0001    , 0.08372654, 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.05978355,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001    ,
       0.0001    , 0.0001    , 0.0001    , 0.0001    , 0.0001 

In [37]:
# Analysing the optimal portfolio
print("Portfolio Analysis\n")
exp_return = expected_return(optimal_weights.x, daily_returns)
exp_volatility = expected_volatility(optimal_weights.x, cov_matrix)
sharpe_ratio = (exp_return - risk_free_rate) / exp_volatility
print(f"Expected Return: {exp_return:.2%}")
print(f"Expected Volatility: {exp_volatility:.2%}")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

Portfolio Analysis

Expected Return: 37.45%
Expected Volatility: 1.40%
Sharpe Ratio: 23.74


In [39]:
# Creating a dataframe for the optimal portfolio
optimal_portfolio = pd.DataFrame(index=tick_taken)
optimal_portfolio['Optimal Weights (%)'] = optimal_weights.x * 100
optimal_portfolio.to_csv('optimal_portfolio.csv') # Saving the data to a csv file