In [1]:
"""
Download every public video from ONE YouTube channel → CSV.

Save this as fetch_channel_videos.py
Run with:  python fetch_channel_videos.py  UCxxxxxxxxxxxxxxxxxxxx
"""

'\nDownload every public video from ONE YouTube channel → CSV.\n\nSave this as fetch_channel_videos.py\nRun with:  python fetch_channel_videos.py  UCxxxxxxxxxxxxxxxxxxxx\n'

In [2]:
import os, sys, time, math, itertools, json
from pathlib import Path
import pandas as pd
from googleapiclient.discovery import build
from tqdm import tqdm   # progress bar

In [3]:
print(os.getenv("YOUTUBE_API_KEY") )

None


In [4]:
API_KEY = os.getenv("YOUTUBE_API_KEY")          # export beforehand or load via dotenv
if not API_KEY:
    raise SystemExit("Set YOUTUBE_API_KEY environment variable first!")

SystemExit: Set YOUTUBE_API_KEY environment variable first!

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
def chunks(seq, n):
    """Yield successive n-sized chunks from seq (used for video-id batching)."""
    for i in range(0, len(seq), n):
        yield seq[i:i + n]

In [None]:
def safe_get(item, path, default=None):
    """Safely drill into nested dicts."""
    for key in path:
        item = item.get(key, {})
    return item or default

In [None]:
def build_service():
    return build("youtube", "v3", developerKey=API_KEY, cache_discovery=False)

In [None]:
def get_uploads_playlist_id(youtube, channel_id):
    """Step 1: one cheap call → uploads playlistId."""
    resp = youtube.channels().list(
        part="contentDetails",
        id=channel_id,
        maxResults=1
    ).execute()
    try:
        return resp["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
    except (KeyError, IndexError):
        raise ValueError("Channel ID not found or no public uploads.")

In [None]:
def get_all_video_ids(youtube, uploads_playlist_id):
    """Step 2: page through playlistItems; collect videoIds."""
    video_ids = []
    next_page = None
    pbar = tqdm(desc="Fetching playlist pages", unit="page")
    while True:
        resp = youtube.playlistItems().list(
            part="contentDetails",
            playlistId=uploads_playlist_id,
            maxResults=50,      # API max
            pageToken=next_page
        ).execute()
        ids = [item["contentDetails"]["videoId"] for item in resp["items"]]
        video_ids.extend(ids)
        pbar.update(1)
        next_page = resp.get("nextPageToken")
        if not next_page:
            break
    pbar.close()
    return video_ids

In [None]:
def fetch_video_metadata(youtube, video_ids):
    """Step 3: batch-fetch videos.list in groups of ≤50 ids."""
    rows = []
    for batch in tqdm(list(chunks(video_ids, 50)), desc="Downloading metadata", unit="batch"):
        resp = youtube.videos().list(
            part="snippet,statistics,contentDetails",
            id=",".join(batch),
            maxResults=50
        ).execute()
        for v in resp["items"]:
            sni, stats, cd = v["snippet"], v["statistics"], v["contentDetails"]
            rows.append({
                "video_id"      : v["id"],
                "title"         : sni.get("title"),
                "published_at"  : sni.get("publishedAt"),
                "description"   : sni.get("description"),
                "duration_ISO"  : cd.get("duration"),       # e.g. PT13M20S
                "tags"          : "|".join(sni.get("tags", [])),
                "view_count"    : int(stats.get("viewCount", 0)),
                "like_count"    : int(stats.get("likeCount", 0)),
                "comment_count" : int(stats.get("commentCount", 0)),
                "favorite_count": int(stats.get("favoriteCount", 0)),
                "channel_title" : sni.get("channelTitle")
            })
        # polite pause – keeps you well below quota & QPS limits
        time.sleep(0.1)
    return pd.DataFrame(rows)

In [None]:
def main(channel_id):
    youtube = build_service()
    uploads_id = get_uploads_playlist_id(youtube, channel_id)
    print(f"Uploads playlist ID: {uploads_id}")
    ids = get_all_video_ids(youtube, uploads_id)
    print(f"Total videos: {len(ids):,}")
    df = fetch_video_metadata(youtube, ids)
    outfile = Path(f"{channel_id}_videos.csv")
    df.to_csv(outfile, index=False)
    print(f"Saved → {outfile.resolve()}")

In [None]:

if __name__ == "__main__":
    if len(sys.argv) != 2:
        raise SystemExit("Usage: python fetch_channel_videos.py <CHANNEL_ID>")
    main(sys.argv[1])