In [1]:
import requests
import numpy as np
import pandas as pd
import yfinance as yf
from io import StringIO

In [2]:
def get_sp500_tickers():
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    
    # makes the request pretend to be a regular Chrome browser on macOS
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
    }

    response = requests.get(url, headers=headers)
    
    # check if the request is successful
    try:
        response.raise_for_status()
        print('Request is successful!')
    except requests.exceptions.HTTPError as e:
        print('Request is unsuccessful:', e)

    # response.text contains the full HTML of the webpage as a string
    # StringIO(response.text) makes the HTML string look like a file that pandas can read
    # scans HTML content for <table> tags and converts it into a DataFrame
    tables = pd.read_html(StringIO(response.text))

    # regex=False tells pandas to treat the string literally instead of interpreting it as a regex pattern
    sp500_table = tables[0]
    tickers = sp500_table['Symbol'].str.replace('.', '-', regex=False).tolist()
    
    print('Successfully retrieved {} S&P 500 tickers.'.format(len(tickers)))
    return tickers

In [3]:
sp500_tickers = get_sp500_tickers()
print('First 10 tickers:', sp500_tickers[:10])

Request is successful!
Successfully retrieved 503 S&P 500 tickers.
First 10 tickers: ['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']


In [4]:
data = yf.download(sp500_tickers, start='2022-10-17', end='2025-10-18', auto_adjust=True)['Close']
data.to_csv('data/sp500_3yrs.csv')

[*********************100%***********************]  503 of 503 completed


In [12]:
index_data = yf.download('^GSPC', start='2022-10-17', end='2025-10-18', auto_adjust=True)

if isinstance(index_data.columns, pd.MultiIndex):
    index_data.columns = index_data.columns.get_level_values(0)

index_data.to_csv('data/index_sp500_3yrs.csv')

[*********************100%***********************]  1 of 1 completed
