# YouTube Trailer Sentiment for WBD Titles

Pull trailer comments via YouTube Data API and run DistilBERT sentiment.

**Contact:** Pablo Monteros — [GitHub](https://github.com/Pmonteros8) • [LinkedIn](https://www.linkedin.com/in/pmonteros/) • [Email](mailto:Pablo.monterosj@gmail.com)

In [None]:
import os, pandas as pd
from googleapiclient.discovery import build
from transformers import pipeline
from dotenv import load_dotenv
load_dotenv("config/.env")
YOUTUBE_KEY = os.getenv("YOUTUBE_API_KEY")
assert YOUTUBE_KEY, "Missing YOUTUBE_API_KEY in config/.env"
clf = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

def fetch_comments(video_id, cap=300):
    yt = build("youtube", "v3", developerKey=YOUTUBE_KEY)
    res = yt.commentThreads().list(part="snippet", videoId=video_id, maxResults=100, textFormat="plainText").execute()
    out = []
    while True:
        for item in res.get("items", []):
            top = item["snippet"]["topLevelComment"]["snippet"]
            out.append({"video_id": video_id, "author": top.get("authorDisplayName"), "text": top.get("textDisplay")})
            if len(out)>=cap: break
        if "nextPageToken" not in res or len(out)>=cap: break
        res = yt.commentThreads().list(part="snippet", videoId=video_id, maxResults=100, pageToken=res["nextPageToken"], textFormat="plainText").execute()
    return pd.DataFrame(out)

# Replace with real trailer IDs; fallback to sample file for offline testing
video_ids = ["dQw4w9WgXcQ"]
dfs = []
for vid in video_ids:
    try:
        dfs.append(fetch_comments(vid, cap=300))
    except Exception:
        pass
data = pd.concat(dfs, ignore_index=True) if dfs else pd.read_csv("data/youtube_comments_sample.csv")
preds = clf(data["text"].astype(str).tolist(), truncation=True)
data["sentiment"] = [p["label"] for p in preds]
data["score"] = [p["score"] for p in preds]
data.groupby("sentiment").size()