## Imports

In [77]:
from dotenv import load_dotenv
import os
import numpy as np
import pandas as pd
import requests
from autogluon.tabular import TabularPredictor
from dotenv import load_dotenv
import os, time, datetime as dt
import requests
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from openai import OpenAI
import json


load_dotenv()
AV_KEY = os.environ["AV_KEY"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]


client = OpenAI()
BASE_URL = "https://www.alphavantage.co/query"


pd.set_option("display.max_colwidth", 120)
pd.set_option("display.expand_frame_repr", False)




## Agent 1 prediction

In [78]:
# Agent 1 (prediction-only)


# silence fastai pickle warning once
warnings.filterwarnings("ignore", category=UserWarning, module="fastai.learner")


# helper
def get_market_cap_now(ticker: str) -> float | None:
    """Current market-cap from Alpha Vantage ‘OVERVIEW’."""
    data = _av_get("OVERVIEW", symbol=ticker)
    cap = data.get("MarketCapitalization")
    return None if cap in (None, "") else float(cap)


def _top_k_models(predictor: TabularPredictor, k: int = 5) -> list[str]:
    lb = predictor.leaderboard(silent=True)  # DataFrame
    return lb.sort_values("score_val", ascending=False).head(k)["model"].tolist()


_TOP_MODELS = _top_k_models(predictor, k=5)


def _ensemble_stats_k(feat_df: pd.DataFrame) -> tuple[float, float]:
    """Mean & std using only the selected _TOP_MODELS."""
    preds = [np.expm1(predictor.predict(feat_df, model=m).iloc[0])
             for m in _TOP_MODELS]
    preds = np.asarray(preds)
    return preds.mean(), preds.std(ddof=0)


# Loading once
predictor = TabularPredictor.load("agent1_model")


# Alpha Vantage helper
def _av_get(function, **params):
    params |= {"apikey": AV_KEY, "function": function}
    url = "https://www.alphavantage.co/query"
    r = requests.get(url, params=params, timeout=10)
    r.raise_for_status()
    return r.json()


def pull_quarterly_fundamentals(ticker):
    income = _av_get("INCOME_STATEMENT", symbol=ticker)
    bal = _av_get("BALANCE_SHEET", symbol=ticker)


    try:
        inc_row = income["quarterlyReports"][0]
        bal_row = bal["quarterlyReports"][0]
    except (KeyError, IndexError):
        return pd.DataFrame()  # nothing returned → skip ticker


    def f(row, key):
        v = row.get(key)
        return np.nan if v in (None, '') else float(v)


    df = pd.DataFrame([{
        "Quarter"  : pd.to_datetime(inc_row["fiscalDateEnding"]),
        "Total_Revenue" : f(inc_row, "totalRevenue"),
        "Net_Income"  : f(inc_row, "netIncome"),
        "EPS"  : f(inc_row, "eps"),
        "Total_Debt"  : f(bal_row, "totalLiabilities"),
        "Total_Assets"  : f(bal_row, "totalAssets"),
    }])


    df["Cash_to_Debt"] = df["Total_Assets"] / df["Total_Debt"].replace(0, np.nan)
    df["NetMargin"] = df["Net_Income"]  / df["Total_Revenue"]
    df["Debt_Ratio"] = df["Total_Debt"]  / df["Total_Assets"]
    df["Asset_Turnover"] = df["Total_Revenue"] / df["Total_Assets"]
    df[["Revenue_pctchg","EPS_pctchg","Income_Growth"]] = 0  # placeholders
    return df


In [79]:
# company metadata copied from the training notebook
ticker_to_name = {
    "TSLA": "Tesla", "F": "Ford", "GM": "GM", "RIVN": "Rivian", "LCID": "Lucid",
    "TM": "Toyota", "HMC": "Honda", "NIO": "NIO", "XPEV": "XPeng", "STLA": "Stellantis",
    "PSNY": "Polestar", "LI": "Li Auto", "RACE": "Ferrari", "LCII": "LCI Industries",
    "ALV": "Autoliv",
}


# Columns the model expects, in order
features = predictor.feature_metadata.get_features()
unique_companies = [c[len("Company_"):] for c in features if c.startswith("Company_")]


def predict_agent1(tickers: list[str]) -> dict:
    res = {}
    for tk in tickers:
        feat_df = pull_quarterly_fundamentals(tk)
        if feat_df.empty:
            continue


        # one-hot prep (unchanged) …
        for comp in unique_companies:
            feat_df[f"Company_{comp}"] = int(ticker_to_name[tk] == comp)
        for col in set(features) - set(feat_df.columns):
            feat_df[col] = 0 if col.startswith("Company_") else np.nan
        feat_df = feat_df[features]


        # ▼ use only top-k models
        pred_cap, spread = _ensemble_stats_k(feat_df)


        cur_cap = get_market_cap_now(tk)
        pct_diff = None if cur_cap in (None, 0) else (pred_cap - cur_cap) / cur_cap * 100


        # confidence: 1 − CV
        conf = 1.0 - (spread / pred_cap if pred_cap else 1.0)
        conf = float(np.clip(conf, 0.0, 1.0))


        res[tk] = {
            "pred_cap" : pred_cap,
            "cur_cap"  : cur_cap,
            "pct_diff" : pct_diff,
            "confidence": conf,
        }
    return res




In [80]:
TICKERS = ["TSLA", "NIO", "RIVN"]
agent1_out = predict_agent1(TICKERS)
agent1_out


If you only need to load model weights and optimizer state, use the safe `Learner.load` instead.
  warn("load_learner` uses Python's insecure pickle module, which can execute malicious arbitrary code when loading. Only load files you trust.\nIf you only need to load model weights and optimizer state, use the safe `Learner.load` instead.")
If you only need to load model weights and optimizer state, use the safe `Learner.load` instead.
  warn("load_learner` uses Python's insecure pickle module, which can execute malicious arbitrary code when loading. Only load files you trust.\nIf you only need to load model weights and optimizer state, use the safe `Learner.load` instead.")
If you only need to load model weights and optimizer state, use the safe `Learner.load` instead.
  warn("load_learner` uses Python's insecure pickle module, which can execute malicious arbitrary code when loading. Only load files you trust.\nIf you only need to load model weights and optimizer state, use the safe `Le

{'TSLA': {'pred_cap': 330483500000.0,
  'cur_cap': 1036014584000.0,
  'pct_diff': -68.10049741461941,
  'confidence': 0.8945824354887009},
 'NIO': {'pred_cap': 28980259000.0,
  'cur_cap': 10901484000.0,
  'pct_diff': 165.83774113689475,
  'confidence': 0.7006491720676422},
 'RIVN': {'pred_cap': 23019170000.0,
  'cur_cap': 15644966000.0,
  'pct_diff': 47.13467445055489,
  'confidence': 0.7216319441795349}}

## Agent 2 Prediction

In [18]:
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
finbert = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
finbert.eval()
labels = ["negative", "neutral", "positive"]
label_to_score = {"negative": -1, "neutral": 0, "positive": 1}


def alpha_news_window(ticker: str,
                      from_dt: dt.datetime,
                      to_dt: dt.datetime,
                      limit: int = 5) -> list[str]:
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "NEWS_SENTIMENT",
        "tickers" : ticker,
        "time_from": from_dt.strftime("%Y%m%dT%H%M"),
        "time_to"  : to_dt.strftime("%Y%m%dT%H%M"),
        "sort"  : "LATEST",
        "apikey"  : AV_KEY,
    }
    r = requests.get(url, params=params, timeout=10)
    if r.status_code == 503:
        raise RuntimeError("Alpha Vantage news quota exhausted")
    r.raise_for_status()
    return [item["title"] for item in r.json().get("feed", [])[:limit]]


def classify_sentiment(text: str) -> dict:
    inputs = tokenizer(text, return_tensors="pt",
                       truncation=True, max_length=512)
    with torch.no_grad():
        out = finbert(**inputs)
    probs = torch.nn.functional.softmax(out.logits, dim=1).numpy()[0]
    idx = int(np.argmax(probs))
    return {"score": label_to_score[labels[idx]],
            "conf" : float(probs[idx])}


def predict_agent2(tickers: list[str],
                   lookback_days: int = 7,
                   top_n: int = 5) -> dict:
    """
    Return {ticker: {'sentiment': float, 'mean_conf': float, 'n': int}}
    where *sentiment* is a weighted mean score (−1 … +1).
    """
    today = dt.datetime.utcnow()
    window_start = today - dt.timedelta(days=lookback_days)


    results = {}
    for tk in tickers:
        try:
            headlines = alpha_news_window(tk, window_start, today, limit=top_n)
        except RuntimeError as e:
            print(e)
            continue
        if not headlines:
            continue
        scores, confs = [], []
        for h in headlines:
            r = classify_sentiment(h)
            scores.append(r["score"])
            confs.append(r["conf"])
            time.sleep(0.05)  # stays well below AV 75 req/min
        s_score = float(np.average(scores, weights=np.square(confs)))
        results[tk] = {"sentiment": s_score,
                       "mean_conf": float(np.mean(confs)),
                       "n": len(headlines)}
    return results




In [19]:
agent2_out = predict_agent2(["TSLA", "NIO", "RIVN"])
agent2_out


{'TSLA': {'sentiment': -0.5470298366800755,
  'mean_conf': 0.9033325910568237,
  'n': 5},
 'NIO': {'sentiment': -0.45896035052789785,
  'mean_conf': 0.7741093635559082,
  'n': 5},
 'RIVN': {'sentiment': -0.39968447834273,
  'mean_conf': 0.8698145508766174,
  'n': 5}}

## Agent 3 prediction

In [52]:
def alpha_headlines(ticker: str,
                    top_n: int = 10) -> list[str]:
    """
    Return up to `top_n` latest headlines for a single ticker.
    Very short titles (<30 chars) are skipped.
    """
    params = {"function": "NEWS_SENTIMENT",
              "tickers": ticker,
              "apikey": AV_KEY}
    resp = requests.get(BASE_URL, params=params, timeout=20)
    resp.raise_for_status()
    return [item["title"]
            for item in resp.json().get("feed", [])[:top_n]
            if len(item["title"]) > 30]


def gpt_sentiment(headline: str,
                  company: str,
                  temperature: float = 0.3) -> tuple[float, str]:
    """
    Return (score, reason) where score ∈ [-1, 1].
    Falls back to (np.nan, 'fail') on any API error.
    """
    prompt = f"""
You are an investor-sentiment analyst.

Company: {company}
Text: "{headline}"

On a scale of -1 (very negative) to +1 (very positive) give a numeric score
and a one-word rationale, as JSON:
{{"score": <number>, "reason": "<word>"}}
""".strip()
    try:
        rsp = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role":"user","content":prompt}],
            temperature=temperature
        )
        data = json.loads(rsp.choices[0].message.content)
        return float(data["score"]), str(data["reason"])
    except Exception:
        return np.nan, "fail"  # ← surface the real error




def predict_agent3(tickers: list[str],
                   ticker_to_name: dict,
                   top_n: int = 8,
                   pause: float = 1.0) -> dict:
    """
    Return {ticker:
        {"sentiment": float, "confidence": float, "n": int}}
    Sentiment = mean(score); confidence = 1 – stdev(score) (clipped 0-1).
    """
    results = {}
    for tk in tickers:
        headlines = alpha_headlines(tk, top_n=top_n)
        if not headlines:
            continue


        scores = []
        for h in headlines:
            sc, _ = gpt_sentiment(h, ticker_to_name[tk])
            scores.append(sc)
            time.sleep(pause)  # 60 calls/min < AV limit


        scores = np.array([s for s in scores if not np.isnan(s)])
        if scores.size == 0:
            continue
        avg = float(scores.mean())
        stdev = float(scores.std(ddof=0))
        conf = max(0.0, 1.0 - stdev)  # simple dispersion proxy
        results[tk] = {"sentiment": avg,
                       "confidence": conf,
                       "n": int(scores.size)}
    return results


In [53]:
TICKERS = ["TSLA", "NIO", "RIVN"]
TICKER_TO_NAME = {
    "TSLA": "Tesla", "NIO": "NIO", "RIVN": "Rivian"
}


agent3_out = predict_agent3(TICKERS, TICKER_TO_NAME, top_n=5)
agent3_out


{'TSLA': {'sentiment': -0.07999999999999999,
  'confidence': 0.5004001601281282,
  'n': 5},
 'NIO': {'sentiment': 0.24000000000000005,
  'confidence': 0.5682593371015419,
  'n': 5},
 'RIVN': {'sentiment': 0.06000000000000001,
  'confidence': 0.60706234591223,
  'n': 5}}

## Unified Agent 

In [72]:
def build_panel(tickers: list[str], lookback: int = 7, top_n: int = 5) -> pd.DataFrame:
    """Merge the three agents into one DataFrame (rows dropped if any agent missing)."""
    a1 = predict_agent1(tickers)
    a2 = predict_agent2(tickers, lookback_days=lookback, top_n=top_n)
    a3 = predict_agent3(tickers, ticker_to_name, top_n=top_n)


    rows = []
    for tk in tickers:
        if tk not in a1 or tk not in a2 or tk not in a3:
            continue  # skip if any signal missing
        rows.append({
            "ticker"  : tk,
            "pred_cap"  : a1[tk]["pred_cap"],
            "cur_cap"  : a1[tk]["cur_cap"],
            "pct_diff"  : a1[tk]["pct_diff"],
            "news_sent" : a2[tk]["sentiment"],
            "gpt_sent"  : a3[tk]["sentiment"],
        })
    return pd.DataFrame(rows)


In [75]:
import json


def rank_companies(df: pd.DataFrame,
                   model: str = "gpt-4o",
                   temperature: float = 0.2) -> list[dict]:
    if df.empty:
        raise RuntimeError("Panel is empty – nothing to rank.")


    # build prompt
    lines = [
        (f"{r.ticker}: Δcap={r.pct_diff:+.1f}% (conf={a1_conf:.2f}) "
         f"news={r.news_sent:+.2f} (n={a2_n}) "
         f"gpt={r.gpt_sent:+.2f} (conf={a3_conf:.2f})")
        for r, a1_conf, a2_n, a3_conf in zip(
            df.itertuples(),
            df["confidence"],  # from Agent-1
            df["news_n"],  # add these cols in build_panel
            df["gpt_conf"],  # idem
        )
    ]


    prompt = (
        f"You are an equity meta-analyst.\n\n"
        f"For each ticker you receive six signals:\n"
        f"  • Δcap  – predicted vs current market-cap % (positive = upside)\n"
        f"  • Δcap_conf – confidence 0-1 from ensemble dispersion\n"
        f"  • news_sent – FinBERT sentiment (−1..+1) plus headline count\n"
        f"  • gpt_sent  – GPT-based sentiment (−1..+1) with its confidence\n\n"
        f"Rank ALL {len(df)} companies from best investment to worst. "
        f"Weight higher-confidence signals more heavily.\n\n"
        "Return JSON strictly in this form:\n"
        '{ "results": [ '
        '{"ticker":"AAA","rank":1,"rationale":"…"}, '
        '{"ticker":"BBB","rank":2,"rationale":"…"} ] }\n\n"
        "Signals:\n" + "\n".join(lines)
    )


    # OpenAI call
    rsp = client.chat.completions.create(
        model=model,
        temperature=temperature,
        response_format={"type": "json_object"},  # requires an *object*
        messages=[
            {"role": "system", "content": "You are an equity analyst."},
            {"role": "user",  "content": prompt}
        ]
    ).choices[0].message.content


    # robust load
    obj = json.loads(rsp)  # always a dict because of json_object


    if "results" not in obj or not isinstance(obj["results"], list):
        raise ValueError(f'JSON lacked "results" array: {obj}')


    rows = obj["results"]
    for idx, d in enumerate(rows, 1):  # ensure rank present
        d.setdefault("rank", idx)


    return sorted(rows, key=lambda d: d["rank"])


In [76]:
pd.set_option("display.max_colwidth", None)  # stop truncating long text


TICKERS = ["TSLA", "NIO", "RIVN"]
panel = build_panel(TICKERS, lookback=7, top_n=5)
ranking = rank_companies(panel, model="gpt-4o")


display(pd.DataFrame(ranking))


Unnamed: 0,ticker,rank,rationale
0,NIO,1,"NIO has shown the highest market cap growth at +249.2% and has a positive gpt sentiment of +0.30, despite slightly negative news sentiment."
1,RIVN,2,"RIVN has a strong market cap growth of +115.5% and a slightly positive gpt sentiment of +0.04, with a less negative news sentiment compared to others."
2,TSLA,3,"TSLA has experienced a significant market cap decline of -56.3% and has neutral gpt sentiment, with the most negative news sentiment among the three."
