In [5]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

INPUT_CSV = "us_headlines.csv"
OUTPUT_CSV = "us_headlines_scored_vader.csv"

def vader_label(compound: float) -> str:
    if compound >= 0.05:
        return "positive"
    if compound <= -0.05:
        return "negative"
    return "neutral"

def main():
    df = pd.read_csv(INPUT_CSV)

    # Combine title + description safely
    text = (
        df["Title"].fillna("").astype(str)
        + ". "
        + df["Description"].fillna("").astype(str)
    )

    analyzer = SentimentIntensityAnalyzer()

    df["vader_compound"] = text.apply(
        lambda t: analyzer.polarity_scores(t)["compound"]
    )
    df["vader_label"] = df["vader_compound"].apply(vader_label)

    df.to_csv(OUTPUT_CSV, index=False)
    print(f"Saved {OUTPUT_CSV} with {len(df)} rows")

if __name__ == "__main__":
    main()


PermissionError: [Errno 13] Permission denied: 'us_headlines_scored_vader.csv'

In [None]:
import pandas as pd
from transformers import pipeline

INPUT_CSV = "us_headlines.csv"
OUTPUT_CSV = "us_headlines_scored_roberta.csv"

MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
BATCH_SIZE = 32

def normalize_label(label: str) -> str:
    l = label.strip().lower()
    if "neg" in l:
        return "negative"
    if "neu" in l:
        return "neutral"
    if "pos" in l:
        return "positive"
    return l

def main():
    df = pd.read_csv(INPUT_CSV)

    # Combine Title + Description (same as you did with VADER)
    texts = (
        df["Title"].fillna("").astype(str)
        + ". "
        + df["Description"].fillna("").astype(str)
    ).tolist()

    clf = pipeline(
        "sentiment-analysis",
        model=MODEL_NAME,
        tokenizer=MODEL_NAME,
        truncation=True,
    )

    outputs = clf(texts, batch_size=BATCH_SIZE)

    df["roberta_label"] = [normalize_label(o["label"]) for o in outputs]
    df["roberta_score"] = [float(o["score"]) for o in outputs]

    df.to_csv(OUTPUT_CSV, index=False)
    print(f"Saved {OUTPUT_CSV} with {len(df)} rows")

if __name__ == "__main__":
    main()

