In [1]:
import os
import time
import pandas as pd
import requests
from datetime import datetime

# === Config ===
API_KEY = "68294627319de7.73423601"
SAVE_DIR = r"C:\Users\flass\OneDrive\AI Financial Model\S&P 500 Chatgpt Version"
os.makedirs(SAVE_DIR, exist_ok=True)

# === Step 1: Get S&P 500 Tickers ===
print("🔍 Fetching S&P 500 tickers from Wikipedia...")
sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
sp500 = pd.read_html(sp500_url)[0]
sp500["Symbol"] = sp500["Symbol"].str.replace(".", "-", regex=False)  # EODHD format
sp500_tickers = sp500[["Symbol", "Security", "GICS Sector", "GICS Sub-Industry"]]
sp500_tickers.columns = ["ticker", "name", "sector", "industry"]
sp500_tickers.to_csv(os.path.join(SAVE_DIR, "sp500_tickers.csv"), index=False)

# === Step 2: Download Daily Price Data ===
print("📈 Downloading daily prices for all tickers...")
df_prices = []
for i, row in sp500_tickers.iterrows():
    ticker = row["ticker"]
    url = f"https://eodhistoricaldata.com/api/eod/{ticker}.US?api_token={API_KEY}&period=d&from=1994-01-01&fmt=json"
    try:
        r = requests.get(url)
        r.raise_for_status()
        data = pd.DataFrame(r.json())
        data["ticker"] = ticker
        df_prices.append(data)
        print(f"✅ {ticker} ({i+1}/{len(sp500_tickers)})")
    except Exception as e:
        print(f"❌ {ticker} - {e}")
    time.sleep(1.2)  # Respect API rate limit

prices = pd.concat(df_prices, ignore_index=True)
prices.to_parquet(os.path.join(SAVE_DIR, "daily_prices.parquet"), index=False)

# === Step 3: Download Fundamentals ===
print("📊 Downloading fundamentals...")
fundamentals = []
metadata = []
for i, row in sp500_tickers.iterrows():
    ticker = row["ticker"]
    url = f"https://eodhistoricaldata.com/api/fundamentals/{ticker}.US?api_token={API_KEY}&fmt=json"
    try:
        r = requests.get(url)
        r.raise_for_status()
        data = r.json()
        general = data.get("General", {})
        financials = data.get("Financials", {})

        # Extract sector/industry from General
        metadata.append({
            "ticker": ticker,
            "sector": general.get("Sector", "Unknown"),
            "industry": general.get("Industry", "Unknown")
        })

        # Flatten fundamentals
        for report_type in ["Income_Statement", "Balance_Sheet", "Cash_Flow"]:
            reports = financials.get(report_type, {}).get("quarterly", {})
            for date, metrics in reports.items():
                record = {"ticker": ticker, "report_type": report_type, "date": date}
                record.update(metrics)
                fundamentals.append(record)

        print(f"✅ {ticker} ({i+1}/{len(sp500_tickers)})")
    except Exception as e:
        print(f"❌ {ticker} - {e}")
    time.sleep(1.2)

funds = pd.DataFrame(fundamentals)
funds["date"] = pd.to_datetime(funds["date"], errors="coerce")
funds.to_parquet(os.path.join(SAVE_DIR, "fundamentals.parquet"), index=False)

meta = pd.DataFrame(metadata).drop_duplicates("ticker")
meta.to_csv(os.path.join(SAVE_DIR, "ticker_metadata.csv"), index=False)

print("✅ Data download complete. All files saved to:", SAVE_DIR)


🔍 Fetching S&P 500 tickers from Wikipedia...
📈 Downloading daily prices for all tickers...
✅ MMM (1/503)
✅ AOS (2/503)
✅ ABT (3/503)
✅ ABBV (4/503)
✅ ACN (5/503)
✅ ADBE (6/503)
✅ AMD (7/503)
✅ AES (8/503)
✅ AFL (9/503)
✅ A (10/503)
✅ APD (11/503)
✅ ABNB (12/503)
✅ AKAM (13/503)
✅ ALB (14/503)
✅ ARE (15/503)
✅ ALGN (16/503)
✅ ALLE (17/503)
✅ LNT (18/503)
✅ ALL (19/503)
✅ GOOGL (20/503)
✅ GOOG (21/503)
✅ MO (22/503)
✅ AMZN (23/503)
✅ AMCR (24/503)
✅ AEE (25/503)
✅ AEP (26/503)
✅ AXP (27/503)
✅ AIG (28/503)
✅ AMT (29/503)
✅ AWK (30/503)
✅ AMP (31/503)
✅ AME (32/503)
✅ AMGN (33/503)
✅ APH (34/503)
✅ ADI (35/503)
✅ ANSS (36/503)
✅ AON (37/503)
✅ APA (38/503)
✅ APO (39/503)
✅ AAPL (40/503)
✅ AMAT (41/503)
✅ APTV (42/503)
✅ ACGL (43/503)
✅ ADM (44/503)
✅ ANET (45/503)
✅ AJG (46/503)
✅ AIZ (47/503)
✅ T (48/503)
✅ ATO (49/503)
✅ ADSK (50/503)
✅ ADP (51/503)
✅ AZO (52/503)
✅ AVB (53/503)
✅ AVY (54/503)
✅ AXON (55/503)
✅ BKR (56/503)
✅ BALL (57/503)
✅ BAC (58/503)
✅ BAX (59/503)
✅ BDX (60/503)
✅ 

ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

In [2]:
prices.to_parquet(os.path.join(SAVE_DIR, "daily_prices.parquet"), index=False)


In [3]:
import pandas as pd
df = pd.read_parquet("C:/Users/flass/OneDrive/AI Financial Model/S&P 500 Chatgpt Version/daily_prices.parquet")
df.head()


Unnamed: 0,date,open,high,low,close,adjusted_close,volume,ticker
0,1994-01-03,108.5002,108.625,106.0,106.875,9.0438,1563411,MMM
1,1994-01-04,106.5,106.9999,105.7498,106.0,8.9698,1668181,MMM
2,1994-01-05,106.1249,106.2498,105.0002,105.7498,8.9486,1731808,MMM
3,1994-01-06,105.625,107.75,105.625,107.2501,9.0756,1777734,MMM
4,1994-01-07,107.375,108.25,107.2501,108.1251,9.1496,2627851,MMM


In [4]:
funds = pd.DataFrame(fundamentals)
funds["date"] = pd.to_datetime(funds["date"], errors="coerce")
funds.to_parquet(os.path.join(SAVE_DIR, "fundamentals.parquet"), index=False)

meta = pd.DataFrame(metadata).drop_duplicates("ticker")
meta.to_csv(os.path.join(SAVE_DIR, "ticker_metadata.csv"), index=False)

print("✅ Data download complete. All files saved to:", SAVE_DIR)

NameError: name 'fundamentals' is not defined