In [None]:
"""This script fetches OHLCV for all 503 S&P tickers and saves to a json"""

In [None]:
from polygon.rest import RESTClient
import pandas as pd
import time
import json

In [None]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the API key
POLYGON_API_KEY = os.getenv("POLYGON_API_KEY")
client = RESTClient(POLYGON_API_KEY)

In [None]:
def get_sp500_tickers():
    table = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")
    tickers = table[0]["Symbol"].tolist()
    return tickers

# Fetch data for a single day with rate limiting
def fetch_sp500_data():
    tickers = get_sp500_tickers()
    data = []

    for i, ticker in enumerate(tickers):
        try:
            # Make it yesterday's date instead of Jan 1
            bars = client.get_aggs(ticker, 1, "day", "2025-01-01", "2025-01-01")
            if bars:
                # Change these to the same type of data in the other script (EMA, SMA, etc)
                data.append(
                    {
                        "ticker": ticker,
                        "open": bars[0].o,
                        "high": bars[0].h,
                        "low": bars[0].l,
                        "close": bars[0].c,
                        "volume": bars[0].v,
                    }
                )

            # Print progress
            print(f"Fetched data for {ticker} ({i+1}/{len(tickers)})")

        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")

        # Rate limit handling: Wait before making the next request
        time.sleep(12)  # Adjust this if needed based on API limits

    return pd.DataFrame(data)

In [None]:
sp500_data = fetch_sp500_data()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

def save_to_json(data, filename="sp500_data.json"):
    try:
        # Convert DataFrame to dictionary if necessary
        if isinstance(data, pd.DataFrame):
            data = data.to_dict(orient="records")

        # Specify the path to your Google Drive folder
        file_path = '/content/drive/MyDrive/RoboInvesting/sp500_data.json'

        # Save the JSON file
        with open(file_path, "w") as f:
            json.dump(data, f, indent=4)

        print(f"Data saved to {file_path}")

    except Exception as e:
        print(f"An error occurred while saving the data: {e}")

# Example usage
save_to_json(sp500_data)