In [21]:
# If you see "No module named psycopg2" or "dotenv", install inside your venv:
# %pip install psycopg2-binary python-dotenv pandas

import os
from pathlib import Path
import pandas as pd
import psycopg2
from dotenv import load_dotenv

# Load .env from project root
load_dotenv(Path.home() / "finreport" / ".env")

PG_DSN = os.getenv("PG_DSN", "postgresql:///finreport")

conn = psycopg2.connect(PG_DSN)
conn


<connection object at 0x799ccf047ec0; dsn: 'postgresql:///finreport', closed: 0>

In [22]:
# Choose a UTC date and optional filters
DAY = "2025-10-02"    # <-- change
TICKERS = ["NVDA", "TSLA"]  # set [] for “any”
SOURCE_ONLY = "finance.yahoo.com"  # set None for “any source”

DAY_START = DAY
DAY_END = (pd.to_datetime(DAY) + pd.Timedelta(days=1)).date().isoformat()

DAY, DAY_START, DAY_END


('2025-10-02', '2025-10-02', '2025-10-03')

In [23]:
where = [
    "published_date_utc >= %s::date",
    "published_date_utc <  %s::date",
    "fetch_status = 'ok'",
    "full_body IS NOT NULL"
]
params = [DAY_START, DAY_END]

if SOURCE_ONLY:
    where.append("publisher->>'name' = %s")
    params.append(SOURCE_ONLY)

if TICKERS:
    # e.g., TICKERS = ['NVDA','TSLA']
    where.append("EXISTS (SELECT 1 FROM unnest(tickers) t WHERE t = ANY(%s))")
    params.append(TICKERS)

sql = f"""
SELECT
    id,
    published_utc,
    title,
    article_url AS url,
    description,
    image_url,
    publisher->>'name' AS source,
    tickers,
    keywords,
    published_date_utc,
    full_body,
    full_body_chars,
    fetched_at,
    fetch_status,
    body_extractor,
    summary
FROM news_raw
WHERE {' AND '.join(where)}
ORDER BY published_utc;
"""

df = pd.read_sql(sql, conn, params=params)
print("Fetched rows:", len(df))


Fetched rows: 36


  df = pd.read_sql(sql, conn, params=params)


In [37]:
df2=df[["id","published_utc","title","tickers","description","keywords","summary","url","full_body","full_body_chars"]]

In [38]:
df2.head()

Unnamed: 0,id,published_utc,title,tickers,description,keywords,summary,url,full_body,full_body_chars
0,tiingo:88275289,2025-10-02 00:37:00+00:00,Amazon’s new Alexa feature impacts Nvidia,[NVDA],Amazon's decision has consequences for suppliers.,"[alexa free, alexa available, alexa just, alexa amazon, backend alexa, alexa latest, tvs alexa, alexa rollout]","Amazon (AMZN) is adding generative AI to daily usage as a basic, permanent layer. This won't just improve voice quer...",https://finance.yahoo.com/news/amazon-alexa-feature-impacts-nvidia-003700735.html,Alexa+ is now available for early access on certain Echo devices. It will also be included in future Fire TV devices...,2675
1,tiingo:88284459,2025-10-02 09:59:02+00:00,Nvidia CEO Jensen Huang calls Elon Musk the 'ultimate GPU',[NVDA],"Nvidia CEO Jensen Huang said that ""all of the interdependencies"" of supercomputing reside in one head: Elon Musk's.","[supercomputer huang, ai supercomputers, gigawatt ai, ceo musk, million gpus, nvidia ceo, build ai, xai colossus]","Huang has good reason to praise the Tesla CEO — Musk's companies are major Nvidia customers, after all. The Colossus...",https://finance.yahoo.com/news/nvidia-ceo-jensen-huang-calls-095902235.html,"-\nJensen Huang said that Elon Musk is capable of keeping ""all of the interdependencies"" of supercomputing in his he...",2441
2,tiingo:88285492,2025-10-02 10:33:19+00:00,Tesla's Sales Decline Expected to Slow in Delivery Report,[TSLA],"The pace of Tesla Inc.'s sales decline is expected to slow, as analysts expect the company likely delivered around 4...","[tesla sales, tesla, sales decline, pace tesla, ago tesla, quarterly decrease, 600 vehicles, slow analysts]","The pace of Tesla Inc.'s sales decline is expected to slow, as analysts expect the company likely delivered around 4...",https://finance.yahoo.com/video/teslas-sales-decline-expected-slow-103319957.html,"The pace of Tesla Inc.'s sales decline is expected to slow, as analysts expect the company likely delivered around 4...",335
3,tiingo:88285119,2025-10-02 10:52:05+00:00,"OpenAI’s Stargate locks in Korean chipmakers, boosting shares",[NVDA],SK Hynix and Samsung are poised to benefit from unprecedented demand for high-bandwidth memory as OpenAI embarks on ...,"[memory market, samsung electronics, memory hbm, global dram, hbm stargate, chip sector, supercomputers, korea biggest]",High-bandwidth memory (HBM) is crucial for AI because it allows chips like Nvidia’s to move massive amounts of data ...,https://finance.yahoo.com/news/openai-stargate-locks-korean-chipmakers-105205593.html,Two of South Korea’s biggest technology firms saw their shares jump in Seoul trading on Thursday after OpenAI announ...,2798
4,tiingo:88285129,2025-10-02 11:00:00+00:00,DigitalOcean Launches New Products and Innovations at Deploy Conference,[NVDA],"BROOMFIELD, Colo., October 02, 2025--DigitalOcean, the simplest scalable cloud for digital native enterprises, today...","[cloud digital, cloud experience, ai cloud, digitalocean announced, capabilities platform, ai product, soon expandin...","At the conference, the company showcased the improvements in its growing AI product portfolio. DigitalOcean’s Gradie...",https://finance.yahoo.com/news/digitalocean-launches-products-innovations-deploy-110000125.html,"In London, DigitalOcean raises the bar for cloud experience for digital native enterprises and developers\nBROOMFIEL...",2557


In [31]:
print(df2.iat[1,4])

Huang has good reason to praise the Tesla CEO — Musk's companies are major Nvidia customers, after all. The Colossus II comprises at least 200,000 Nvidia GPUs. The supercomputer aims to expand to at least 1 million GPUs. That being said, Nvidia's chips have quickly become a hot commodity, with the company supplying much of computing systems in Big Tech's AI race. Last week, Nvidia announced a $100 billion into OpenAI — helmed by Musk's frenemy Sam Altman — to support the build-out of AI data centers.

Musk is building xAI's Colossus II data center outside of Memphis, Tennessee. He has already spent at least $400 million on the world's largest supercomputer, per documents reviewed by Business Insider.


In [4]:
pd.set_option("display.max_colwidth", 120)

print("Counts by fetch_status")
display(df["fetch_status"].value_counts(dropna=False))

print("\nSources:")
display(df["source"].value_counts().head(15))

print("\nBody presence:")
display(df["full_body"].notna().value_counts())

# Add convenience metrics
df["body_chars"] = df["full_body"].map(lambda x: len(x) if isinstance(x, str) else 0)
df["word_est"]   = (df["body_chars"] / 6).round().astype(int)   # rough words
df["token_est"]  = (df["body_chars"] / 4).round().astype(int)   # rough tokens
df[["body_chars","word_est","token_est"]].describe()


Counts by fetch_status


fetch_status
ok    36
Name: count, dtype: int64


Sources:


source
finance.yahoo.com    36
Name: count, dtype: int64


Body presence:


full_body
True    36
Name: count, dtype: int64

Unnamed: 0,body_chars,word_est,token_est
count,36.0,36.0,36.0
mean,2051.833333,342.0,513.083333
std,868.115414,144.726145,216.973254
min,266.0,44.0,66.0
25%,1514.75,252.0,379.0
50%,2455.5,409.5,614.0
75%,2720.5,453.5,680.5
max,3071.0,512.0,768.0
