In [2]:
%pip install yt-dlp pandas tqdm

Note: you may need to restart the kernel to use updated packages.


In [3]:
import yt_dlp
import pandas as pd
from tqdm import tqdm

In [4]:
INPUT_FILE = "ids.txt"
OUTPUT_FILE = "youtube_comments.csv"

In [None]:

def get_comments_ytdlp(video_id):
    """Extract comments with extended metadata using yt-dlp."""
    url = f"https://www.youtube.com/watch?v={video_id}"
    ydl_opts = {
        "skip_download": True,
        "quiet": True,
        "extract_flat": False,
        "extractor_args": {"youtube": {"player_client": ["android"]}},
        "getcomments": True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=False)

    comments = info.get("comments", [])
    title = info.get("title", "Unknown title")

    rows = []
    for c in comments:
        text = c.get("text", "").replace("\n", " ").strip()
        if not text:
            continue
        rows.append({
            "id": video_id,
            "video_title": title,
            "comment": text,
            "author": c.get("author", ""),
            "like_count": c.get("like_count", 0),
            "published": c.get("timestamp", ""),
            "url": url
        })
    return rows

In [7]:
with open(INPUT_FILE, "r") as f:
    video_ids = [line.strip() for line in f if line.strip()]

all_comments = []
for vid in tqdm(video_ids, desc="Scraping videos"):
    try:
        all_comments.extend(get_comments_ytdlp(vid))
    except Exception as e:
        print(f"Error scraping {vid}: {e}")

df = pd.DataFrame(all_comments)
df.to_csv(OUTPUT_FILE, index=False)
print(f"Saved {len(df)} comments to {OUTPUT_FILE}")

Scraping videos: 100%|██████████| 34/34 [37:04<00:00, 65.44s/it]


Saved 77912 comments to youtube_comments.csv
