In [None]:
# visualize_sentiment.py

import os
import sys
from pathlib import Path

import pandas as pd
import torch
import requests
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ─── resolve script_dir ───────────────────────────────────────────────────────
try:
    # when run as a .py file
    script_dir = Path(__file__).parent
except NameError:
    # when run in VSCode interactive/run-file: cwd should already be the NLP folder
    script_dir = Path.cwd()

# make sure MQSMaster (parent of NLP) is on sys.path
proj_root = script_dir.parent
if str(proj_root) not in sys.path:
    sys.path.insert(0, str(proj_root))

# ─── CONFIG ───────────────────────────────────────────────────────────────────
API_KEY    = "my_api"  # your FMP key
MODEL_PATH = script_dir / "finbert-finetuned-final"
ART_DIR    = script_dir / "articles"

# ─── HELPERS ──────────────────────────────────────────────────────────────────
def load_articles(ticker: str) -> pd.DataFrame:
    path = ART_DIR / f"{ticker}.csv"
    df = pd.read_csv(path, parse_dates=["publishedDate"])
    df["date"] = df["publishedDate"].dt.date
    return df

def analyze_sentiment(df: pd.DataFrame, tokenizer, model) -> pd.DataFrame:
    recs = []
    for _, row in df.iterrows():
        text = row.get("content") or row.get("title", "")
        if not isinstance(text, str) or not text.strip():
            continue
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        with torch.no_grad():
            logits = model(**inputs).logits
        probs = torch.softmax(logits, dim=1)[0]
        recs.append({
            "date": row["date"],
            "sentiment": probs[0].item() - probs[2].item()
        })
    return pd.DataFrame(recs)

def fetch_prices(ticker: str) -> pd.DataFrame:
    url = (
        f"https://financialmodelingprep.com/api/v3/"
        f"historical-price-full/{ticker}?apikey={API_KEY}"
    )
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json().get("historical", [])
    prices = pd.DataFrame(data)
    prices["date"] = pd.to_datetime(prices["date"])
    return prices[["date", "close"]]

# ─── MAIN ─────────────────────────────────────────────────────────────────────
def main(ticker: str):
    # load model/tokenizer from local folder
    tokenizer = AutoTokenizer.from_pretrained(str(MODEL_PATH))
    model     = AutoModelForSequenceClassification.from_pretrained(str(MODEL_PATH))
    model.eval()

    # score articles
    art_df  = load_articles(ticker)
    sent_df = analyze_sentiment(art_df, tokenizer, model)
    if sent_df.empty:
        print(f"No articles or valid text found for {ticker}.")
        return

    # daily aggregation
    daily = sent_df.groupby("date")["sentiment"].mean().reset_index()
    daily["date"] = pd.to_datetime(daily["date"])

    # fetch prices & merge
    price_df = fetch_prices(ticker)
    merged   = price_df.merge(daily, on="date", how="left")
    merged["sentiment"].fillna(0, inplace=True)
    merged["sentiment_ma"] = merged["sentiment"].rolling(7, min_periods=1).mean()

    # plot
    fig, ax1 = plt.subplots(figsize=(14,7))
    ax1.plot(merged["date"], merged["close"], label="Close Price")
    ax1.set_xlabel("Date"); ax1.set_ylabel("Price")
    ax1.grid(True, linestyle="--", alpha=0.5)

    ax2 = ax1.twinx()
    ax2.plot(
        merged["date"],
        merged["sentiment_ma"],
        color="tab:red",
        label="7-Day Sentiment MA"
    )
    ax2.set_ylabel("Sentiment Score")
    ax2.axhline(0, color="gray", linestyle="--", linewidth=0.8)

    # legend & title
    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1+h2, l1+l2, loc="upper left")
    plt.title(f"{ticker}: Price vs. Sentiment")
    plt.tight_layout()
    plt.show()

# ─── ENTRY POINT ──────────────────────────────────────────────────────────────
if __name__ == "__main__":
    ticker = input("Enter ticker (e.g. AAPL): ").strip().upper()
    main(ticker)
