In [None]:
# visualize_sentiment.py

import os
import sys
import time
from pathlib import Path

import pandas as pd
import torch
import requests
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ─── resolve script_dir ───────────────────────────────────────────────────────
try:
    # when run as a .py file
    script_dir = Path(__file__).parent
except NameError:
    # when run in VSCode interactive/run-file: cwd should already be the NLP folder
    script_dir = Path.cwd()

# make sure MQSMaster (parent of NLP) is on sys.path
proj_root = script_dir.parent
if str(proj_root) not in sys.path:
    sys.path.insert(0, str(proj_root))

# ─── CONFIG ───────────────────────────────────────────────────────────────────
API_KEY    = "IPSiO49jnQdx4Y1XqyghXojVFzJnwtKb"  # your FMP key
MODEL_PATH = script_dir / "finbert-finetuned-final"
ART_DIR    = script_dir / "articles"

# ─── HELPERS ──────────────────────────────────────────────────────────────────

def load_articles(ticker: str) -> pd.DataFrame:
    path = ART_DIR / f"{ticker}.csv"
    df = pd.read_csv(path, parse_dates=["publishedDate"])
    df["date"] = df["publishedDate"].dt.date
    return df

def analyze_sentiment_batched(df: pd.DataFrame, tokenizer, model) -> pd.DataFrame:
    """
    Tokenize all texts in one batch and do a single forward pass.
    Returns a DataFrame with 'date' and sentiment score = pos_prob - neg_prob.
    """
    texts = (df["content"].fillna("") + " " + df["title"].fillna("")).tolist()
    dates = pd.to_datetime(df["date"]).tolist()

    # batch-tokenize
    inputs = tokenizer(
        texts,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
    )

    with torch.no_grad():
        logits = model(**inputs).logits
    probs = torch.softmax(logits, dim=1).cpu().numpy()

    # sentiment = positive_prob − negative_prob
    scores = probs[:, 0] - probs[:, 2]

    return pd.DataFrame({ "date": dates, "sentiment": scores })

def fetch_last_7_days(ticker: str) -> pd.DataFrame:
    """
    Fetch the last 7 trading days' close prices using FMP's timeseries parameter.
    """
    url = (
        f"https://financialmodelingprep.com/api/v3/"
        f"historical-price-full/{ticker}"
        f"?timeseries=7&apikey={API_KEY}"
    )
    resp = requests.get(url)
    resp.raise_for_status()
    json_resp = resp.json()

    # pull out the 'historical' list
    records = json_resp.get("historical", []) if isinstance(json_resp, dict) else []
    prices = pd.DataFrame(records)
    prices["date"] = pd.to_datetime(prices["date"])
    return prices[["date", "close"]]

# ─── MAIN ─────────────────────────────────────────────────────────────────────
def main(ticker: str):
    # 1. Load model & tokenizer
    t0 = time.perf_counter()
    tokenizer = AutoTokenizer.from_pretrained(str(MODEL_PATH))
    model     = AutoModelForSequenceClassification.from_pretrained(str(MODEL_PATH))
    model.eval()
    print(f"Model load time: {time.perf_counter() - t0:.2f}s")

    # 2. Load articles
    t1 = time.perf_counter()
    art_df = load_articles(ticker)
    print(f"Loaded {len(art_df)} articles in {time.perf_counter() - t1:.2f}s")

    # 3. Analyze sentiment (batched)
    if art_df.empty:
        print(f"No articles found for {ticker}.")
        return

    t2 = time.perf_counter()
    sent_df = analyze_sentiment_batched(art_df, tokenizer, model)
    print(f"Sentiment analysis time: {time.perf_counter() - t2:.2f}s")

    # 4. Aggregate daily
    daily = sent_df.groupby("date")["sentiment"].mean().reset_index()
    daily["date"] = pd.to_datetime(daily["date"])

    # 5. Fetch last 7 days of prices
    t3 = time.perf_counter()
    price_df = fetch_last_7_days(ticker)
    print(f"Price fetch time: {time.perf_counter() - t3:.2f}s")

    # 6. Merge and fill missing sentiment
    merged = price_df.merge(daily, on="date", how="left")
    merged["sentiment"].fillna(0, inplace=True)
    merged["sentiment_ma"] = merged["sentiment"].rolling(7, min_periods=1).mean()

    # 7. Plot
    fig, ax1 = plt.subplots(figsize=(14, 7))
    ax1.plot(merged["date"], merged["close"], label="Close Price")
    ax1.set_xlabel("Date")
    ax1.set_ylabel("Price")
    ax1.grid(True, linestyle="--", alpha=0.5)

    ax2 = ax1.twinx()
    ax2.plot(
        merged["date"],
        merged["sentiment_ma"],
        label="7-Day Sentiment MA"
    )
    ax2.set_ylabel("Sentiment Score")
    ax2.axhline(0, linestyle="--", linewidth=0.8)

    # combine legends
    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1 + h2, l1 + l2, loc="upper left")

    plt.title(f"{ticker}: Price vs. Sentiment (Last 7 Days)")
    plt.tight_layout()
    plt.show()

# ─── ENTRY POINT ──────────────────────────────────────────────────────────────
if __name__ == "__main__":
    ticker = input("Enter ticker (e.g. AAPL): ").strip().upper()
    main(ticker)
