In [2]:
import yfinance as yf
import pandas as pd
import numpy as np

In [3]:
# Downloading S&P 500 data for the past 5 years

sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0] # Taking the ticker symbols from wikipedia
sp500['Symbol'] = sp500['Symbol'].str.replace('.', '-') # Yahoo Finance uses dashes instead of dots
tickers = sp500['Symbol'].unique().tolist() # Get all unique tickers
end_date = pd.to_datetime('2024-03-31') # End date is the last date of FY23-24
start_date = end_date - pd.DateOffset(years=5) # Taking 5 years of data
df = yf.download(tickers, start=start_date, end=end_date).stack() # Downloading data
df.index.names = ['date', 'ticker'] # Renaming index
df.columns = df.columns.str.lower() # Lowercasing columns
df


[*********************100%%**********************]  503 of 503 completed


2 Failed downloads:
['SOLV', 'GEV']: Exception("%ticker%: Data doesn't exist for startDate = 1554004800, endDate = 1711857600")





Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-04-01,A,78.727043,81.559998,81.769997,80.959999,80.959999,1522700.0
2019-04-01,AAL,31.922014,32.349998,32.730000,31.700001,31.980000,7438200.0
2019-04-01,AAPL,46.026638,47.810001,47.919998,47.095001,47.910000,111448000.0
2019-04-01,ABBV,63.534752,80.779999,81.110001,79.970001,80.980003,4759400.0
2019-04-01,ABT,72.875168,79.660004,80.739998,79.209999,80.669998,4842000.0
...,...,...,...,...,...,...,...
2024-03-28,XYL,129.240005,129.240005,130.220001,129.149994,129.559998,953200.0
2024-03-28,YUM,138.649994,138.649994,138.830002,137.389999,137.389999,1770900.0
2024-03-28,ZBH,131.979996,131.979996,133.899994,131.600006,132.929993,1425300.0
2024-03-28,ZBRA,301.440002,301.440002,302.630005,298.040009,300.239990,376900.0


In [4]:
df.to_csv('sp500_data.csv') # Saving data to a CSV file

In [5]:
# Getting 5y beta for the stocks
beta_dict = {ticker:0 for ticker in tickers} # Initialize dictionary with 0s
for ticker in tickers:
    try:
        beta_dict[ticker] = yf.Ticker(ticker).info['beta'] # Get beta
    except:
        beta_dict[ticker] = np.nan # If beta is not available, set it to NaN
    print(f'{ticker}: {beta_dict[ticker]}')
beta_dict

MMM: 1.035
AOS: 1.253
ABT: 0.74
ABBV: 0.564
ACN: 1.181
ADBE: 1.272
AMD: 1.635
AES: 1.087
AFL: 0.914
A: 1.122
APD: 0.812
ABNB: 1.254
AKAM: 0.703
ALB: 1.646
ARE: 1.068
ALGN: 1.642
ALLE: 1.134
LNT: 0.54
ALL: 0.513
GOOGL: 1.054
GOOG: 1.054
MO: 0.695
AMZN: 1.16
AMCR: 0.844
AEE: 0.449
AAL: 1.569
AEP: 0.508
AXP: 1.227
AIG: 1.028
AMT: 0.674
AWK: 0.624
AMP: 1.347
AME: 1.216
AMGN: 0.595
APH: 1.31
ADI: 1.164
ANSS: 1.15
AON: 0.853
APA: 3.306
AAPL: 1.276
AMAT: 1.583
APTV: 1.904
ACGL: 0.573
ADM: 0.769
ANET: 1.108
AJG: 0.68
AIZ: 0.508
T: 0.701
ATO: 0.663
ADSK: 1.417
ADP: 0.785
AZO: 0.705
AVB: 0.933
AVY: 0.921
AXON: 0.911
BKR: 1.445
BALL: 0.893
BAC: 1.388
BK: 1.113
BBWI: 1.876
BAX: 0.634
BDX: 0.458
BRK-B: 0.888
BBY: 1.506
BIO: 0.88
TECH: 1.216
BIIB: -0.009
BLK: 1.37
BX: 1.486
BA: 1.527
BKNG: 1.402
BWA: 1.285
BXP: 1.165
BSX: 0.793
BMY: 0.39
AVGO: 1.268
BR: 0.982
BRO: 0.781
BF-B: 0.77
BLDR: 2.057
BG: 0.651
CDNS: 1.032
CZR: 2.813
CPT: 0.851
CPB: 0.23
COF: 1.471
CAH: 0.697
KMX: 1.598
CCL: 2.551
CARR: 1.25

{'MMM': 1.035,
 'AOS': 1.253,
 'ABT': 0.74,
 'ABBV': 0.564,
 'ACN': 1.181,
 'ADBE': 1.272,
 'AMD': 1.635,
 'AES': 1.087,
 'AFL': 0.914,
 'A': 1.122,
 'APD': 0.812,
 'ABNB': 1.254,
 'AKAM': 0.703,
 'ALB': 1.646,
 'ARE': 1.068,
 'ALGN': 1.642,
 'ALLE': 1.134,
 'LNT': 0.54,
 'ALL': 0.513,
 'GOOGL': 1.054,
 'GOOG': 1.054,
 'MO': 0.695,
 'AMZN': 1.16,
 'AMCR': 0.844,
 'AEE': 0.449,
 'AAL': 1.569,
 'AEP': 0.508,
 'AXP': 1.227,
 'AIG': 1.028,
 'AMT': 0.674,
 'AWK': 0.624,
 'AMP': 1.347,
 'AME': 1.216,
 'AMGN': 0.595,
 'APH': 1.31,
 'ADI': 1.164,
 'ANSS': 1.15,
 'AON': 0.853,
 'APA': 3.306,
 'AAPL': 1.276,
 'AMAT': 1.583,
 'APTV': 1.904,
 'ACGL': 0.573,
 'ADM': 0.769,
 'ANET': 1.108,
 'AJG': 0.68,
 'AIZ': 0.508,
 'T': 0.701,
 'ATO': 0.663,
 'ADSK': 1.417,
 'ADP': 0.785,
 'AZO': 0.705,
 'AVB': 0.933,
 'AVY': 0.921,
 'AXON': 0.911,
 'BKR': 1.445,
 'BALL': 0.893,
 'BAC': 1.388,
 'BK': 1.113,
 'BBWI': 1.876,
 'BAX': 0.634,
 'BDX': 0.458,
 'BRK-B': 0.888,
 'BBY': 1.506,
 'BIO': 0.88,
 'TECH': 1.216

In [7]:
beta_df = pd.DataFrame.from_dict(beta_dict, orient='index', columns=['beta']) # Convert dictionary to DataFrame
beta_df.to_csv('sp500_beta.csv') # Saving data to a CSV file