In [30]:
# ============================================
# YouTube Emotion Scraper (WORKING IN 2025)
# Tanpa API Key â€¢ Tanpa yt-dlp Search
# ============================================

import requests
import re
import csv
import itertools
from bs4 import BeautifulSoup
import yt_dlp

In [31]:
# ------------------------------
# 1. HTML SEARCH (no API key)
# ------------------------------

def search_youtube(keyword, max_results=40):
    query = keyword.replace(" ", "+")
    url = f"https://www.youtube.com/results?search_query={query}"

    print(f"Searching YouTube: {keyword}")

    try:
        r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        html = r.text

        # Extract video IDs from HTML
        video_ids = re.findall(r"watch\?v=([a-zA-Z0-9_-]{11})", html)

        # Remove duplicates
        unique_ids = []
        seen = set()

        for vid in video_ids:
            if vid not in seen:
                seen.add(vid)
                unique_ids.append(vid)

        print(f" -> Found {len(unique_ids)} videos")

        return unique_ids[:max_results]

    except Exception as e:
        print("Search error:", e)
        return []


In [32]:
# ------------------------------
# 2. SCRAPE COMMENTS
# ------------------------------

def scrape_comments(video_id, max_comments=10000):
    url = f"https://www.youtube.com/watch?v={video_id}"

    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "extract_flat": True,
        "getcomments": True,
    }

    print(f"   Scraping comments from video {video_id} ...")

    comments = []

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)

        if 'comments' in info:
            for c in info['comments']:
                if "text" in c:
                    comments.append(c["text"])

                if len(comments) >= max_comments:
                    break

    except Exception as e:
        print(f"   Error scraping comments for {video_id}: {e}")

    print(f"   -> Collected {len(comments)} comments")
    return comments

In [33]:
# ------------------------------
# 3. SAVE CSV
# ------------------------------

def save_csv(filename, rows):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["comment"])
        writer.writerows([[r] for r in rows])
    print(f"Saved {len(rows)} comments to {filename}")

In [34]:
# ------------------------------
# 4. EMOTION KEYWORDS
# ------------------------------

emotion_keywords = {
    "sadness": ["sad song", "broken heart", "sad story", "emotional music"],
    "anger": ["angry rant", "rage compilation", "angry moments"],
    "fear": ["scary story", "horror story", "creepy videos"],
    "joy": ["happy moments", "funny video", "joyful moments"],
    "love": ["love song", "romantic story", "relationship advice"]
}

In [35]:
def collect_emotion_comments(title, keywords=None, target_count=10000):
    print(f"\n============================")
    print(f" Collecting EMOTION: {title}")
    print(f"============================")

    # ------------------------------------
    # 1. Gunakan 1 video saja
    # ------------------------------------
    video_id = "vcsSc2iksC0"
    print(f"Scraping comments from video: {video_id}")

    # Ambil komentar sebanyak mungkin
    all_comments = scrape_comments(video_id, max_comments=target_count)

    print(f"Total comments collected: {len(all_comments)}")

    # ------------------------------------
    # 2. Simpan ke CSV
    # ------------------------------------
    file_name = f"{title}.csv"
    save_csv(file_name, all_comments)

    print(f"Saved {len(all_comments)} comments to {file_name}")


In [36]:
# ============================================
# RUN ALL EMOTIONS
# ============================================

title = "Life_in_a_Day_2020_Official_Documentary"


# for emotion, keywords in emotion_keywords.items():
collect_emotion_comments(title,  target_count=10000)

print("\n=== ALL DONE! ===")


 Collecting EMOTION: Life_in_a_Day_2020_Official_Documentary
Scraping comments from video: vcsSc2iksC0
   Scraping comments from video vcsSc2iksC0 ...




   -> Collected 509 comments
Total comments collected: 509
Saved 509 comments to Life_in_a_Day_2020_Official_Documentary.csv
Saved 509 comments to Life_in_a_Day_2020_Official_Documentary.csv

=== ALL DONE! ===


In [None]:
# Ini keyword ee

# Uncomment kalo mau jalanin scrap
# emotion = "sadness"
# emotion = "anger"
# emotion = "fear"
# emotion = "joy"
# emotion = "love"

# queries = {
#     "sadness":  '"I feel sad" OR "I am sad" OR "so sad" OR "heartbroken" OR "feeling down" OR "I feel depressed"',
#     "anger":    '"I am angry" OR "so mad" OR "pissed off" OR "furious" OR "so angry" OR "I hate this"',
#     "fear":     '"I am scared" OR "I feel anxious" OR "terrified" OR "I am afraid" OR "panic attack"',
#     "joy":      '"I feel happy" OR "so happy" OR "excited" OR "grateful" OR "I am joyful" OR "good news"',
#     "love":     '"I love you" OR "I love this" OR "so in love" OR "I adore" OR "I really love"'
# }
# keyword = queries[emotion]