In [83]:
import yfinance as yf
import os, contextlib
import pandas as pd
from pathlib import Path
import tqdm

**Get SP500 list**

In [None]:
def get_sp500_tickers_alternative():

    # fetch SP500
    csv_url = 'https://raw.githubusercontent.com/datasets/s-and-p-500-companies/master/data/constituents.csv'
    df = pd.read_csv(csv_url)

    tickers = df['Symbol'].tolist()
    tickers = [ticker.replace('.', '-') for ticker in tickers]
    
    return tickers


sp500_symbols = get_sp500_tickers_alternative()

# 打印结果进行验证
print("length of sp500_symbols:", len(sp500_symbols))
print(sp500_symbols[:10])

length of sp500_symbols: 503
['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']


**Use yfinance API to get the stock data in SP500 list from 2021 to now**

In [86]:
outfile = Path('sp500_yfinance_data.csv')
if outfile.exists():
    outfile.unlink()

In [None]:
# force silencing of verbose API
with open(os.devnull, 'w') as devnull:
    with contextlib.redirect_stdout(devnull):
        for i in tqdm.tqdm(range(3)):
            s = sp500_symbols[i]
            # avoid multi-level index
            data = yf.download(s, start='2021-01-01', end='2025-11-07', multi_level_index = False)
            if len(data.index) == 0:
                continue
            df = data.reset_index()
            # insert code column
            df.insert(0, "code", s)
            
            # append to csv, only write header if file does not exist
            df.to_csv(outfile, index=False, mode="a", header=not outfile.exists())
            
print(f"All data saved to: {outfile.resolve()}")

  data = yf.download(s, start='2021-01-01', end='2025-11-07', multi_level_index = False)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(s, start='2021-01-01', end='2025-11-07', multi_level_index = False)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(s, start='2021-01-01', end='2025-11-07', multi_level_index = False)
[*********************100%***********************]  1 of 1 completed
100%|██████████| 3/3 [00:00<00:00,  4.30it/s]

All data saved to: /Users/zhangyici/Desktop/研一 Fall/DB/Final_Project/StockScope/etl/sp500_yfinance_data.csv





In [None]:
test = sp500_symbols[0]
data = yf.download(test, start='2021-01-01', end='2025-11-07', multi_level_index = False)
df = data.reset_index()
df.insert(0, "code", test)
df.to_csv('test.csv', index = False, header=True)

  data = yf.download(test, start='2021-01-01', end='2025-11-07', multi_level_index = False)
[*********************100%***********************]  1 of 1 completed
