In [7]:
import os, time
os.environ["POLYGON_API_KEY"] = "204ekqfZd74VDFec7FIRukJo1Keos1C4"
from pathlib import Path
from datetime import datetime, timezone, timedelta
from typing import List, Dict, Any, Optional

import httpx
import pandas as pd
from dateutil import parser as dtparse
from dotenv import load_dotenv

In [9]:
# Load from your project .env (~/finreport/.env). Adjust path if needed.
env_path = Path.cwd() / ".env"
if not env_path.exists():
    # If your notebook is in a subfolder, try project root two levels up:
    maybe_root_env = Path.cwd().parents[0] / ".env"
    if maybe_root_env.exists():
        env_path = maybe_root_env

load_dotenv(dotenv_path=env_path)

API_KEY = os.getenv("POLYGON_API_KEY")
assert API_KEY, "POLYGON_API_KEY missing. Put it in your .env or set it here."

BASE = "https://api.polygon.io/v2/reference/news"


In [10]:
def iso_utc(dt: datetime) -> str:
    return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")

def fetch_all_news_for_utc_day(day_str: str,
                               per_page: int = 1000,
                               sleep_s: float = 0.2) -> List[Dict[str, Any]]:
    """
    Retrieve ALL Polygon news for [day, day+1) UTC, no ticker filter.
    Uses published_utc.gte / published_utc.lt and follows next_url until exhausted.
    """
    # Normalize the date string (YYYY-MM-DD)
    day_date = datetime.fromisoformat(day_str).date()
    start = datetime(day_date.year, day_date.month, day_date.day, tzinfo=timezone.utc)
    end = start + timedelta(days=1)

    # Use range modifiers (the REST API supports .gte / .lt)
    params = {
        "order": "asc",
        "sort": "published_utc",
        "limit": per_page,                         # Polygon max is 1000
        "published_utc.gte": iso_utc(start),
        "published_utc.lt": iso_utc(end),
    }
    # Auth via header (works across pages via next_url)
    headers = {"Authorization": f"Bearer {API_KEY}"}

    items: List[Dict[str, Any]] = []
    url = BASE
    with httpx.Client(timeout=60) as client:
        while url:
            r = client.get(url, params=params if url == BASE else None, headers=headers)
            r.raise_for_status()
            data = r.json()
            results = data.get("results") or []
            items.extend(results)

            # follow pagination
            url = data.get("next_url")
            params = None  # only used on first call
            if url:
                time.sleep(sleep_s)

    return items


In [13]:
# Choose the UTC day to fetch (YYYY-MM-DD). Example:
DAY = "2025-10-03"   # <-- change this

rows = fetch_all_news_for_utc_day(DAY, per_page=1000)
print(f"TOTAL for {DAY} (UTC): {len(rows)}")


TOTAL for 2025-10-03 (UTC): 213


In [14]:
def normalize_row(x: Dict[str, Any]) -> Dict[str, Any]:
    pub = x.get("published_utc")
    pub_dt = dtparse.isoparse(pub) if pub else None
    pub_name = None
    pub_obj = x.get("publisher")
    if isinstance(pub_obj, dict):
        pub_name = pub_obj.get("name")
    return {
        "published_utc": pub_dt,
        "title": x.get("title"),
        "url": x.get("article_url"),
        "publisher": pub_name,
        "tickers": x.get("tickers") or [],
        "id": x.get("id"),
        "description": x.get("description"),
    }

df = pd.DataFrame([normalize_row(x) for x in rows])
print("Shape:", df.shape)

# Show first 10 rows
with pd.option_context("display.max_colwidth", 120):
    display(df.head(10))

# Save full dump to CSV under build/
out_dir = Path.cwd() / "build"
out_dir.mkdir(exist_ok=True, parents=True)
out_csv = out_dir / f"polygon_news_{DAY}.csv"
df_out = df.copy()
# make tickers CSV-friendly (comma-separated string)
df_out["tickers"] = df_out["tickers"].map(lambda t: ",".join(t) if isinstance(t, list) else t)
df_out.to_csv(out_csv, index=False)
print("Saved:", out_csv)


Shape: (213, 7)


Unnamed: 0,published_utc,title,url,publisher,tickers,id,description
0,2025-10-03 00:05:00+00:00,"Univest Securities, LLC Announces Closing of $15 Million Registered Direct Offering for its Client Chijet Motor Comp...",https://www.globenewswire.com/news-release/2025/10/03/3160759/0/en/Univest-Securities-LLC-Announces-Closing-of-15-Mi...,GlobeNewswire Inc.,[CJET],a48be4aa5f4f278960d2fbf4731a672febddcb51a4821aa93ede219f5bbc8e41,"Univest Securities completed a $15 million registered direct offering for Chijet Motor Company, selling 100 million ..."
1,2025-10-03 00:12:45+00:00,Why Symbotic Stock Triumphed on Thursday,https://www.fool.com/investing/2025/10/02/why-symbotic-stock-triumphed-on-thursday/?source=iedfolrf0000001,The Motley Fool,"[SYM, WMT]",74568333482e039c56f88d09446e5e408b1d15489414a225007ed5b914276a98,Industrial robotics company Symbotic saw its stock price increase nearly 10% after Northcoast Research initiated cov...
2,2025-10-03 00:20:12+00:00,Why MercadoLibre Stock Is Sinking This Week,https://www.fool.com/investing/2025/10/02/why-mercadolibre-stock-is-sinking-this-week/?source=iedfolrf0000001,The Motley Fool,"[MELI, AMZN]",70ba1a548fae9e16c5b079cb4fa9c19de007a8a3e2fab80e902bdcb0fb0949e8,"MercadoLibre experienced a 10% stock decline as Amazon aggressively expands into Brazil, waiving fulfillment fees fo..."
3,2025-10-03 00:30:00+00:00,"Ceramic Tiles Market worth $227.8 billion by 2030 at 3.0 %, says MarketsandMarkets™",https://www.globenewswire.com/news-release/2025/10/03/3160761/0/en/Ceramic-Tiles-Market-worth-227-8-billion-by-2030-...,GlobeNewswire Inc.,[MHK],c2b3660d915f3e86c1b14cd5a73c34c509fb7cef50d9c483b1dfdba8b81601ab,"The global ceramic tiles market is expected to grow from $196.2 billion in 2025 to $227.8 billion by 2030, with a 3...."
4,2025-10-03 00:33:41+00:00,"CoreWeave's Valuation Soars on Meta Partnership, But Is It Overheating?",https://www.fool.com/investing/2025/10/02/coreweaves-valuation-soars-on-meta-partnership-but/?source=iedfolrf0000001,The Motley Fool,"[CRWV, META, NVDA, MSFT]",66c3ecd77fe2ef3ce76eb3076a04f252b19cfbe5132a22ca5fdaa6cd006478c4,AI cloud infrastructure company CoreWeave secured a $14.2 billion cloud computing deal with Meta Platforms through 2...
5,2025-10-03 01:11:00+00:00,"ZenaTech anuncia el establecimiento de su sede global de negocio Drone como Servicio (DaaS) en Orlando, aprovechando...",https://www.globenewswire.com/news-release/2025/10/03/3160765/0/es/ZenaTech-anuncia-el-establecimiento-de-su-sede-gl...,GlobeNewswire Inc.,[ZENA],d4743e787ab062ed939077aad4a26793e590adf8e79344548b25439c03e13b94,"ZenaTech is establishing its global Drone as a Service (DaaS) headquarters in Orlando, Florida, aiming to expand its..."
6,2025-10-03 02:29:00+00:00,"ROSEN, RECOGNIZED INVESTOR COUNSEL, Encourages Spirit Aviation Holdings, Inc. Investors to Secure Counsel Before Imp...",https://www.globenewswire.com/news-release/2025/10/03/3160767/673/en/ROSEN-RECOGNIZED-INVESTOR-COUNSEL-Encourages-Sp...,GlobeNewswire Inc.,[FLYYQ],2ea2c6460d90ad58dd8c1c9269ae479ae2dc40ed33c9fb66dbc8946fc88d3008,Rosen Law Firm filed a class action lawsuit against Spirit Aviation Holdings for allegedly making false statements a...
7,2025-10-03 04:57:00+00:00,Aduro Clean Technologies participera à des événements majeurs du secteur en octobre,https://www.globenewswire.com/news-release/2025/10/03/3160771/0/fr/Aduro-Clean-Technologies-participera-%C3%A0-des-%...,GlobeNewswire Inc.,[ADUR],b4bef2812f2a7465e1ce7de358b01cac37471ff3e293090efc5a9cae1d7bf4fc,"Aduro Clean Technologies will participate in three major industry events in October 2025, including K 2025, Sustaina..."
8,2025-10-03 04:57:00+00:00,Aduro Clean Technologies präsentiert sich im Oktober auf führenden Branchenveranstaltungen,https://www.globenewswire.com/news-release/2025/10/03/3160771/0/de/Aduro-Clean-Technologies-pr%C3%A4sentiert-sich-im...,GlobeNewswire Inc.,[ADUR],ec8f7e7ccf3b1e636f8b231a62232764b9d583e403990668047b7f31229f498b,"Aduro Clean Technologies will participate in three major industry events in October 2025, including K 2025, Sustaina..."
9,2025-10-03 05:05:00+00:00,FDA approves Roche’s Tecentriq plus lurbinectedin as first-line maintenance therapy for extensive-stage small cell l...,https://www.globenewswire.com/news-release/2025/10/03/3160776/0/en/FDA-approves-Roche-s-Tecentriq-plus-lurbinectedin...,GlobeNewswire Inc.,"[JAZZ, RHHBY]",cec0fb9147b2c046d1c491f15c6b272e214565863c227bcfcc9eb1859e21084f,The FDA approved Tecentriq and lurbinectedin as the first combination therapy for maintenance treatment of extensive...


Saved: /home/philippe/build/polygon_news_2025-10-03.csv


In [15]:
import pandas as pd

# If you already have df loaded from your CSV or the API:
# df = pd.read_csv("build/polygon_news_2025-10-03.csv")

# Make sure tickers are proper lists
def ensure_list(x):
    if isinstance(x, str):
        # handle "[AAPL, MSFT]" or "AAPL,MSFT"
        return [t.strip(" []'\"") for t in x.split(",") if t.strip()]
    elif isinstance(x, list):
        return x
    return []

df["tickers"] = df["tickers"].map(ensure_list)

# Explode so each ticker has its own row
exploded = df.explode("tickers")

# Count frequency
top_counts = (
    exploded["tickers"]
    .value_counts()
    .head(10)
    .to_frame("mentions")
)

top_counts

Unnamed: 0_level_0,mentions
tickers,Unnamed: 1_level_1
NVDA,14
TSLA,10
AMZN,10
MSFT,9
PLTR,6
GOOGL,5
AAPL,5
META,5
USAR,5
USARW,5
