In [None]:
import os
from pathlib import Path

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = NOTEBOOK_DIR.parent

DATA_DIR = os.path.join(PROJECT_ROOT, "data")
RAW_EQUITIES_DIR = os.path.join(DATA_DIR, "raw", "equities")

print(PROJECT_ROOT)
print(RAW_EQUITIES_DIR)

In [None]:
import sys
print(sys.executable)
import requests
import pandas as pd
from io import StringIO

url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )
}

resp = requests.get(url, headers=headers)
resp.raise_for_status()

html = resp.text
tables = pd.read_html(StringIO(html))

tickers = (
    tables[1]["Symbol"]
    .astype(str)
    .str.upper()
    .str.strip()
    .str.replace(".", "-", regex=False)
)

print(list(tickers))


In [None]:
import yfinance as yf

for ticker in tickers:
    df = yf.download(
        ticker,
        period="20y",
        interval="1d",
        auto_adjust=False,
        progress=False,
        )
    
    if df.empty:
        print(F"Warning: no data for {ticker}")
        continue

    out_path = os.path.join(RAW_EQUITIES_DIR, f"{ticker}.csv")
    print(out_path)
    df.to_csv(out_path)
    print(f"Saved {ticker} to {out_path}")
    
