## Setup

In [1]:
# standard libs
import os
import json
from datetime import datetime
import time


# third-party libs
import requests
import pandas as pd
from openai import OpenAI


AV_KEY = os.environ["AV_KEY"]
BASE_URL = "https://www.alphavantage.co/query"
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
client = OpenAI()


TICKERS = {
    "TSLA": "Tesla",
    "F": "Ford",
    "GM": "GM",
    "RIVN": "Rivian",
    "LCID": "Lucid",
    "TM": "Toyota",
    "HMC": "Honda",
    "NIO": "NIO",
    "XPEV": "XPeng",
    "STLA": "Stellantis",
    "PSNY": "Polestar",
    "LI": "Li Auto",
    "RACE": "Ferrari",
    "LCII": "LCI Industries",
    "ALV": "Autoliv"
}


## Fetching Data

In [2]:
OUT_DIR = "agent3_jsons"  # keeps JSONs out of the root
os.makedirs(OUT_DIR, exist_ok=True)  # creating once


def fetch_news_sentiment(symbol: str,
                         company: str,
                         api_key: str,
                         top_n: int = 10) -> list[dict]:
    params = {
        "function": "NEWS_SENTIMENT",
        "tickers": symbol,
        "apikey": api_key
    }
    resp = requests.get(BASE_URL, params=params, timeout=30)


    if resp.status_code != 200:
        raise RuntimeError(f"Alpha Vantage error {resp.status_code}: {resp.text}")


    articles = resp.json().get("feed", [])[:top_n]


    cleaned = [
        {
            "company": company,
            "text": art["title"],
            "timestamp": art["time_published"]
        }
        for art in articles
        if len(art["title"]) > 30  # filter ultra-short headlines
    ]


    fname = f"{company.lower().replace(' ', '_')}_agent3_news.json"
    out_file = os.path.join(OUT_DIR, fname)  # new path
    with open(out_file, "w", encoding="utf-8") as f:
        json.dump(cleaned, f, indent=2)


    print(f"Saved {len(cleaned)} items → {out_file}")
    return cleaned




## Sentiment Score

In [3]:
def call_gpt_sentiment(
        text: str,
        company_name: str,
        model: str = "gpt-4o-mini",
        temperature: float = 0.3,
        max_retries: int = 3
    ) -> tuple[float | None, str | None]:
    """
    Query OpenAI to obtain a sentiment score in the range [-1, 1]
    and a one-word rationale for `text` regarding `company_name`.
    Returns (score, reason).  None is returned on failure.
    """
    prompt = f"""
You are an investor sentiment analyst.

Company: {company_name}
Text: "{text}"

Task: On a scale of -1 (very negative) to +1 (very positive), rate the sentiment toward this company.
Also return a one-word reason.

Respond in this exact JSON format:
{{"score": <number>, "reason": "<word>"}}
""".strip()


    for attempt in range(1, max_retries + 1):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                temperature=temperature
            )
            reply = response.choices[0].message.content.strip()
            parsed = json.loads(reply)


            score = float(parsed.get("score"))
            reason = str(parsed.get("reason"))
            return score, reason


        except Exception as err:
            print(f"[attempt {attempt}/{max_retries}] Error: {err}")
            time.sleep(2 * attempt)  # simple back-off


    # all retries exhausted
    return None, None


## Agent 3 function

In [4]:
def run_agent3(
        symbol: str,
        company: str,
        api_key: str,
        top_n: int = 10,
        pause: float = 1.0
    ) -> dict:
    """
    End-to-end pipeline for a single company:
    1. Download recent headlines.
    2. Score each headline with GPT.
    3. Aggregate into a dashboard-style dictionary.
    """
    # 1 ▸ data collection
    data = fetch_news_sentiment(symbol, company, api_key, top_n=top_n)


    # 2 ▸ sentiment scoring
    for item in data:
        score, reason = call_gpt_sentiment(item["text"], company)
        item["score"] = score
        item["reason"] = reason
        time.sleep(pause)  # light throttle


    # 3 ▸ tidy DataFrame
    df = pd.DataFrame([d for d in data if d["score"] is not None])
    df["timestamp"] = pd.to_datetime(df["timestamp"], format="%Y%m%dT%H%M%S")
    df["date"] = df["timestamp"].dt.date
    df["score"] = df["score"].astype(float)


    if df.empty:  # fallback in rare no-data case
        return {"ticker": symbol, "error": "no valid scores"}


    # 4 ▸ summary statistics
    avg = df["score"].mean()
    stdev = df["score"].std()
    conf = max(0.0, 1.0 - stdev)


    top_pos = df.loc[df["score"].idxmax()]
    top_neg = df.loc[df["score"].idxmin()]


    implication = (
        "Strong upside potential"  if avg > 0.6 else
        "Moderate upside potential" if avg > 0.3 else
        "Neutral to mild optimism"  if avg > 0.1 else
        "Neutral"  if -0.1 <= avg <= 0.1 else
        "Mild downside pressure"  if avg > -0.3 else
        "Moderate downside pressure"if avg > -0.6 else
        "Strong downside pressure"
    )


    return {
        "ticker": symbol,
        "sentiment_score": round(avg, 3),
        "sentiment_label": (
            "Bullish" if avg > 0.25 else
            "Neutral" if -0.25 <= avg <= 0.25 else
            "Bearish"
        ),
        "confidence": round(conf, 3),
        "n_docs": int(df.shape[0]),
        "weighted_stdev": round(stdev, 3),
        "value_implication": implication,
        "top_positive": {
            "score": round(top_pos["score"], 3),
            "snippet": top_pos["text"]
        },
        "top_negative": {
            "score": round(top_neg["score"], 3),
            "snippet": top_neg["text"]
        }
    }


## Single company test

In [5]:
result = run_agent3("TSLA", "Tesla", AV_KEY, top_n=5)
print(json.dumps(result, indent=2))


Saved 5 items → tesla_agent3_news.json
{
  "ticker": "TSLA",
  "sentiment_score": 0.2,
  "sentiment_label": "Neutral",
  "confidence": 0.654,
  "n_docs": 5,
  "weighted_stdev": 0.346,
  "value_implication": "Neutral to mild optimism",
  "top_positive": {
    "score": 0.8,
    "snippet": "Tesla Valuation 'Could Far Exceed Current Levels': Analyst Sees 2 Segments Driving Most Upside  ( And It's Not Cars )  - Tesla  ( NASDAQ:TSLA ) "
  },
  "top_negative": {
    "score": 0.0,
    "snippet": "What's Going On With Li Auto Stock Tuesday? - Li Auto  ( NASDAQ:LI ) "
  }
}


## All companies

In [6]:
def run_full_universe(api_key: str = AV_KEY,
                      top_n: int = 10,
                      pause: float = 1.0) -> pd.DataFrame:
    """
    Execute Agent 3 across the full 15-ticker universe.
    Returns a DataFrame and writes a consolidated JSON output.
    """
    results = []


    for symbol, company in TICKERS.items():
        print(f"\n▶ Processing {company} ({symbol})")
        res = run_agent3(symbol, company, api_key, top_n=top_n, pause=pause)
        results.append(res)


    df_all = pd.DataFrame(results)


    # save to disk
    out_file = f"agent3_universe_{datetime.utcnow().date()}.json"
    df_all.to_json(out_file, orient="records", indent=2)
    print(f"\nSaved universe summary → {out_file}")


    return df_all


df_summary = run_full_universe(top_n=5)  # quick smoke test
df_summary.head()



▶ Processing Tesla (TSLA)
Saved 5 items → tesla_agent3_news.json

▶ Processing Ford (F)
Saved 4 items → ford_agent3_news.json

▶ Processing GM (GM)
Saved 5 items → gm_agent3_news.json

▶ Processing Rivian (RIVN)
Saved 4 items → rivian_agent3_news.json

▶ Processing Lucid (LCID)
Saved 5 items → lucid_agent3_news.json

▶ Processing Toyota (TM)
Saved 4 items → toyota_agent3_news.json

▶ Processing Honda (HMC)
Saved 5 items → honda_agent3_news.json

▶ Processing NIO (NIO)
Saved 5 items → nio_agent3_news.json

▶ Processing XPeng (XPEV)
Saved 5 items → xpeng_agent3_news.json

▶ Processing Stellantis (STLA)
Saved 5 items → stellantis_agent3_news.json

▶ Processing Polestar (PSNY)
Saved 5 items → polestar_agent3_news.json

▶ Processing Li Auto (LI)
Saved 5 items → li auto_agent3_news.json

▶ Processing Ferrari (RACE)
Saved 5 items → ferrari_agent3_news.json

▶ Processing LCI Industries (LCII)
Saved 5 items → lci industries_agent3_news.json

▶ Processing Autoliv (ALV)
Saved 5 items → autoliv_a

Unnamed: 0,ticker,sentiment_score,sentiment_label,confidence,n_docs,weighted_stdev,value_implication,top_positive,top_negative
0,TSLA,0.16,Neutral,0.642,5,0.358,Neutral to mild optimism,"{'score': 0.8, 'snippet': 'Tesla Valuation 'Co...","{'score': 0.0, 'snippet': 'What's Going On Wit..."
1,F,-0.075,Neutral,0.35,4,0.65,Neutral,"{'score': 0.5, 'snippet': 'Insights Into Ford ...","{'score': -1.0, 'snippet': 'DTE Energy Misses ..."
2,GM,0.24,Neutral,0.512,5,0.488,Neutral to mild optimism,"{'score': 0.7, 'snippet': 'Mary Kay Has GM Thi...","{'score': -0.5, 'snippet': 'Ford Q2 Earnings C..."
3,RIVN,0.075,Neutral,0.432,4,0.568,Neutral,"{'score': 0.5, 'snippet': 'Rivian Automotive ...","{'score': -0.7, 'snippet': 'Why Is Wall Street..."
4,LCID,0.18,Neutral,0.483,5,0.517,Neutral to mild optimism,"{'score': 0.7, 'snippet': 'Lucid and Timothée ...","{'score': -0.5, 'snippet': 'Lucid Is Sinking T..."


## Evaluations

## Loading the headline data

In [7]:
import os, glob, json, pandas as pd


OUT_DIR = "agent3_jsons"


def load_agent3_jsons(folder: str = OUT_DIR) -> pd.DataFrame:
    """Read every *_agent3_news.json file and return one DataFrame."""
    rows = []
    for path in glob.glob(os.path.join(folder, "*_agent3_news.json")):
        with open(path, "r", encoding="utf-8") as f:
            rows.extend(json.load(f))
    return pd.DataFrame(rows)


df_all = load_agent3_jsons()
print(f"{df_all.shape[0]} headlines loaded across {df_all['company'].nunique()} companies.")
df_all.head()


72 headlines loaded across 15 companies.


Unnamed: 0,company,text,timestamp
0,Autoliv,"GPC Tops Q2 Earnings Estimates, Slashes 2025 V...",20250722T141200
1,Autoliv,Autoliv Hit Sales and Margin Records in Q2,20250718T234607
2,Autoliv,Autoliv ( ALV ) Q2 2025 Earnings Call Transc...,20250718T163906
3,Autoliv,Are You a Momentum Investor? This 1 Stock Coul...,20250718T135001
4,Autoliv,Autoliv Q2 Earnings - Autoliv ( NYSE:ALV ),20250718T123702


## Adding model scores + internal consistency check

In [8]:
from tqdm.auto import tqdm  # progress bar

def score_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply call_gpt_sentiment row-wise and append 'score' & 'reason'.
    Existing scores are overwritten so you can re-run any time.
    """
    scores, reasons = [], []
    for txt, comp in tqdm(zip(df["text"], df["company"]), total=len(df)):
        s, r = call_gpt_sentiment(txt, comp)
        scores.append(s); reasons.append(r)
    df = df.copy()
    df["score"] = scores
    df["reason"] = reasons
    return df


df_scored = score_dataframe(df_all)


# reliability: variance when rescoring the SAME headline k times
def calc_consistency(sample_df: pd.DataFrame, k: int = 3) -> float:
    stdevs = []
    for txt in sample_df["text"]:
        vals = [call_gpt_sentiment(txt, "dummy")[0] for _ in range(k)]
        stdevs.append(pd.Series(vals).std())
    return float(pd.Series(stdevs).mean())


reliab = calc_consistency(df_scored.sample(50, random_state=1))
print(f"Average intra-headline σ (k=3) = {reliab:.3f}")


  0%|          | 0/72 [00:00<?, ?it/s]

Average intra-headline σ (k=3) = 0.036


In [10]:
# determine a feasible sample size
sample_n = min(len(df_scored), 200)


(df_scored[["company", "text", "score"]]
 .sample(sample_n, random_state=0)
 .assign(expected_score="")  # column for manual labels
 .to_csv("agent3_eval_template.csv", index=False))


print(f"Template saved → agent3_eval_template.csv  ({sample_n} rows)")




Template saved → agent3_eval_template.csv  (72 rows)


In [12]:
from sklearn.metrics import mean_squared_error, r2_score


df_lab = pd.read_csv("agent3_eval_template.csv")
df_lab = df_lab.dropna(subset=["expected_score"]).astype({"expected_score": float})


rmse = mean_squared_error(df_lab["expected_score"], df_lab["score"])
r2 = r2_score(df_lab["expected_score"], df_lab["score"])
rho = df_lab[["expected_score", "score"]].corr().iloc[0, 1]


print(f"RMSE = {rmse:.3f}  |  R² = {r2:.3f}  |  ρ = {rho:.3f}")


RMSE = 0.175  |  R² = 0.382  |  ρ = 0.747


In [13]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch


_tok = AutoTokenizer.from_pretrained("ProsusAI/finbert")
_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")


def finbert_score(txt: str) -> float:
    inputs = _tok(txt, return_tensors="pt", truncation=True, max_length=128)
    with torch.no_grad():
        logits = _model(**inputs).logits.squeeze()
    probs = torch.softmax(logits, dim=0).numpy()  # [neg, neu, pos]
    return float(probs[2] - probs[0])


df_scored["finbert"] = df_scored["text"].apply(finbert_score)
corr = df_scored[["score", "finbert"]].corr().iloc[0, 1]
print(f"GPT-4o vs FinBERT correlation ρ = {corr:.3f}")


GPT-4o vs FinBERT correlation ρ = -0.042
