In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

In [2]:
# Downloading S&P 500 data for the past 5 years

sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0] # Taking the ticker symbols from wikipedia
sp500['Symbol'] = sp500['Symbol'].str.replace('.', '-') # Yahoo Finance uses dashes instead of dots
tickers = sp500['Symbol'].unique().tolist() # Get all unique tickers
end_date = pd.to_datetime('2024-03-31') # End date is the last date of FY23-24
start_date = end_date - pd.DateOffset(months=61) # Taking 5 years of data
df = yf.download(tickers, start=start_date, end=end_date).stack() # Downloading data
df.index.names = ['date', 'ticker'] # Renaming index
df.columns = df.columns.str.lower() # Lowercasing columns
df


[*********************100%%**********************]  503 of 503 completed


2 Failed downloads:





['SOLV', 'GEV']: Exception("%ticker%: Data doesn't exist for startDate = 1551330000, endDate = 1711857600")


Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-02-28,A,76.524223,79.440002,79.750000,78.889999,79.180000,1759100.0
2019-02-28,AAL,35.158627,35.630001,35.889999,35.169998,35.630001,7873800.0
2019-02-28,AAPL,41.672821,43.287498,43.727501,43.230000,43.580002,112861600.0
2019-02-28,ABBV,62.323521,79.239998,79.709999,78.750000,79.459999,6310800.0
2019-02-28,ABT,71.008911,77.620003,78.059998,77.010002,77.010002,6762700.0
...,...,...,...,...,...,...,...
2024-03-28,XYL,129.240005,129.240005,130.220001,129.149994,129.559998,953200.0
2024-03-28,YUM,138.649994,138.649994,138.830002,137.389999,137.389999,1770900.0
2024-03-28,ZBH,131.979996,131.979996,133.899994,131.600006,132.929993,1425300.0
2024-03-28,ZBRA,301.440002,301.440002,302.630005,298.040009,300.239990,376900.0


In [3]:
df.to_csv('sp500_data.csv') # Saving data to a CSV file

In [4]:
# Getting 5y beta for the stocks
beta_dict = {ticker:0 for ticker in tickers} # Initialize dictionary with 0s
for ticker in tickers:
    try:
        beta_dict[ticker] = yf.Ticker(ticker).info['beta'] # Get beta
    except:
        beta_dict[ticker] = np.nan # If beta is not available, set it to NaN

In [5]:
beta_df = pd.DataFrame.from_dict(beta_dict, orient='index', columns=['beta']) # Convert dictionary to DataFrame
beta_df.to_csv('sp500_beta.csv') # Saving data to a CSV file

In [6]:
# Downsampling data to monthly frequency and saving it to a CSV file
df.unstack().resample('M').last().stack()

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-02-28,A,76.524223,79.440002,79.750000,78.889999,79.180000,1759100.0
2019-02-28,AAL,35.158627,35.630001,35.889999,35.169998,35.630001,7873800.0
2019-02-28,AAPL,41.672821,43.287498,43.727501,43.230000,43.580002,112861600.0
2019-02-28,ABBV,62.323521,79.239998,79.709999,78.750000,79.459999,6310800.0
2019-02-28,ABT,71.008911,77.620003,78.059998,77.010002,77.010002,6762700.0
...,...,...,...,...,...,...,...
2024-03-31,XYL,129.240005,129.240005,130.220001,129.149994,129.559998,953200.0
2024-03-31,YUM,138.649994,138.649994,138.830002,137.389999,137.389999,1770900.0
2024-03-31,ZBH,131.979996,131.979996,133.899994,131.600006,132.929993,1425300.0
2024-03-31,ZBRA,301.440002,301.440002,302.630005,298.040009,300.239990,376900.0


In [7]:
df_ac = pd.DataFrame(df['adj close'])
df_ac.index.names = ['date', 'ticker']

In [8]:
# Getting monthly returns for each stock from adjusted close prices
df_ac['monthly_return'] = df_ac.groupby('ticker')['adj close'].pct_change()

In [9]:
# Removing the rows before 01-04-2019
df_ac = df_ac.loc[pd.Timestamp('2019-04-01'):].copy()
df_ac

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,monthly_return
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-01,A,78.727051,0.016755
2019-04-01,AAL,31.922014,0.018577
2019-04-01,AAPL,46.026638,0.006791
2019-04-01,ABBV,63.534752,0.002358
2019-04-01,ABT,72.875153,-0.003503
...,...,...,...
2024-03-28,XYL,129.240005,-0.001082
2024-03-28,YUM,138.649994,0.009685
2024-03-28,ZBH,131.979996,-0.004751
2024-03-28,ZBRA,301.440002,0.007621


In [10]:
# Exporting the monthly returns to a CSV file
df_ac.to_csv('sp500_monthly_return.csv')