In [14]:
import requests
import pandas as pd
from datetime import datetime, timedelta

In [15]:
# 1. Request with a browser-like User-Agent
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}

response = requests.get(url, headers=headers)
response.raise_for_status()  # check for errors

# 2. Parse HTML into pandas
tables = pd.read_html(response.text)
df_sp500 = tables[0]  # first table has the constituent list
df_sp500.sample(20)

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
7,AES,AES Corporation,Utilities,Independent Power Producers & Energy Traders,"Arlington, Virginia",1998-10-02,874761,1981
25,AEP,American Electric Power,Utilities,Electric Utilities,"Columbus, Ohio",1957-03-04,4904,1906
222,GDDY,GoDaddy,Information Technology,Internet Services & Infrastructure,"Tempe, Arizona",2024-06-24,1609711,1997
97,CRL,Charles River Laboratories,Health Care,Life Sciences Tools & Services,"Wilmington, Massachusetts",2021-05-14,1100682,1947
469,VRSN,Verisign,Information Technology,Internet Services & Infrastructure,"Reston, Virginia",2006-02-01,1014473,1995
334,NEE,NextEra Energy,Utilities,Multi-Utilities,"Juno Beach, Florida",1976-06-30,753308,1984 (1925)
361,PAYX,Paychex,Industrials,Human Resource & Employment Services,"Penfield, New York",1998-10-01,723531,1971
108,C,Citigroup,Financials,Diversified Banks,"New York City, New York",1988-05-31,831001,1998
455,TSN,Tyson Foods,Consumer Staples,Packaged Foods & Meats,"Springdale, Arkansas",2005-08-10,100493,1935
319,TAP,Molson Coors Beverage Company,Consumer Staples,Brewers,"Chicago, Illinois",1976-06-30,24545,"2005 (Molson 1786, Coors 1873)"


In [30]:
tickers = df_sp500["Symbol"].tolist()
print(f"Fetched {len(tickers)} tickers!")
tickers[:25]

Fetched 503 tickers!


['MMM',
 'AOS',
 'ABT',
 'ABBV',
 'ACN',
 'ADBE',
 'AMD',
 'AES',
 'AFL',
 'A',
 'APD',
 'ABNB',
 'AKAM',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE']

In [31]:
import pandas_datareader.data as web
from datetime import datetime

# Stooq requires '.US' suffix for American stocks
tickers = [ticker.replace('.', '-') + '.US' for ticker in tickers]
start = datetime(2020, 1, 1)
end = datetime.now()

In [32]:
tickers[:25]

['MMM.US',
 'AOS.US',
 'ABT.US',
 'ABBV.US',
 'ACN.US',
 'ADBE.US',
 'AMD.US',
 'AES.US',
 'AFL.US',
 'A.US',
 'APD.US',
 'ABNB.US',
 'AKAM.US',
 'ALB.US',
 'ARE.US',
 'ALGN.US',
 'ALLE.US',
 'LNT.US',
 'ALL.US',
 'GOOGL.US',
 'GOOG.US',
 'MO.US',
 'AMZN.US',
 'AMCR.US',
 'AEE.US']

In [35]:
dfs = []
failed_tickers = []

print("Downloading from Stooq...\n")

for ticker in tickers:
    try:
        df = web.DataReader(ticker, "stooq", start, end)

        # Stooq returns newest → oldest
        df = df.sort_index()

        # Add ticker column
        df["ticker"] = ticker

        dfs.append(df)
        print(f"✅ Success: {ticker}")

    except Exception as e:
        failed_tickers.append(ticker)
        print(f"❌ Failed: {ticker} | {type(e).__name__}")

# Concatenate by rows
if dfs:
    final_df = pd.concat(dfs, axis=0)
else:
    final_df = pd.DataFrame()

Downloading from Stooq...

✅ Success: MMM.US
✅ Success: AOS.US
✅ Success: ABT.US
✅ Success: ABBV.US
✅ Success: ACN.US
✅ Success: ADBE.US
✅ Success: AMD.US
✅ Success: AES.US
✅ Success: AFL.US
✅ Success: A.US
✅ Success: APD.US
❌ Failed: ABNB.US | ReadTimeout
✅ Success: AKAM.US
✅ Success: ALB.US
✅ Success: ARE.US
✅ Success: ALGN.US
✅ Success: ALLE.US
✅ Success: LNT.US
✅ Success: ALL.US
✅ Success: GOOGL.US
✅ Success: GOOG.US
❌ Failed: MO.US | ReadTimeout
✅ Success: AMZN.US
✅ Success: AMCR.US
✅ Success: AEE.US
✅ Success: AEP.US
✅ Success: AXP.US
✅ Success: AIG.US
✅ Success: AMT.US
✅ Success: AWK.US
✅ Success: AMP.US
✅ Success: AME.US
✅ Success: AMGN.US
✅ Success: APH.US
✅ Success: ADI.US
✅ Success: AON.US
✅ Success: APA.US
✅ Success: APO.US
✅ Success: AAPL.US
✅ Success: AMAT.US
✅ Success: APP.US
✅ Success: APTV.US
✅ Success: ACGL.US
✅ Success: ADM.US
✅ Success: ARES.US
❌ Failed: ANET.US | ReadTimeout
✅ Success: AJG.US
✅ Success: AIZ.US
✅ Success: T.US
✅ Success: ATO.US
✅ Success: ADSK.US
✅ 

In [36]:
print("\nSummary")
print("--------")
print(f"Successful: {len(dfs)}")
print(f"Failed: {len(failed_tickers)}")

final_df


Summary
--------
Successful: 494
Failed: 9


Unnamed: 0,ticker,Open,High,Low,Close,Volume
2020-01-02,AES.US,18.3302,18.4467,18.2463,18.3835,3.254986e+06
2020-01-03,AES.US,18.2551,18.3933,18.1180,18.1732,5.828317e+06
2020-01-06,AES.US,18.1269,18.3933,18.1180,18.3835,4.741658e+06
2020-01-07,AES.US,18.3766,18.4664,18.2463,18.4467,3.530769e+06
2020-01-08,AES.US,18.4852,18.5859,18.2551,18.4131,4.877053e+06
...,...,...,...,...,...,...
2026-01-21,ZTS.US,123.6750,127.1200,123.6750,125.0800,5.201868e+06
2026-01-22,ZTS.US,124.2800,126.4700,123.7300,124.4000,4.715418e+06
2026-01-23,ZTS.US,124.3900,124.8100,123.5100,124.0500,3.339665e+06
2026-01-26,ZTS.US,123.9100,124.9800,122.9700,123.9200,4.262127e+06


In [None]:
# TODO: fix why most tickers are not here!
final_df.ticker.value_counts()

AES.US     1525
ALLE.US    1525
GOOG.US    1525
T.US       1525
BSX.US     1525
BXP.US     1525
CVNA.US    1525
KMB.US     1525
POOL.US    1525
PFG.US     1525
STX.US     1525
TJX.US     1525
GWW.US     1525
WAT.US     1525
ZTS.US     1525
APP.US     1202
Name: ticker, dtype: int64