In [1]:
! pip install yt_dlp



In [12]:
import json
import os
import re
from yt_dlp import YoutubeDL

# Utility to extract YouTube video ID from URL
def extract_video_id(url):
    m = re.search(r"v=([^&]+)", url)
    if m:
        return m.group(1)
    m = re.search(r"youtu\.be/([^?&]+)", url)
    if m:
        return m.group(1)
    return None

# Sanitize titles for filenames
def sanitize_filename(name):
    return re.sub(r"[\\\\/:*?\"<>|]", "_", name)

# Load video URLs from JSON search results
def load_video_urls(path):
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return [item.get("link") for item in data if item.get("link")]

# Build parent-reply hierarchy in comments
def structure_comments(comments):
    cmap = {c['id']: c for c in comments if c.get('id')}
    roots = []
    for c in comments:
        pid = c.get('parent_id')
        if pid and pid in cmap:
            cmap[pid].setdefault('replies', []).append(c)
        else:
            roots.append(c)
    return roots


def main():
    # Ensure output directory exists
    os.makedirs("output", exist_ok=True)

    # Read URLs
    urls = load_video_urls('videossearch-snowwhite.json')

    for url in urls:
        vid = extract_video_id(url)
        if not vid:
            print(f"Invalid URL skipped: {url}")
            continue
        video_url = f"https://www.youtube.com/watch?v={vid}"

        # Scrape metadata + comments
        ydl_opts = {"getcomments": True}
        with YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(video_url, download=False)

        # Meta fields
        meta = {
            "Title": info.get("title"),
            "Description": info.get("description"),
            "Uploader": info.get("uploader"),
            "View Count": info.get("view_count"),
            "Likes": info.get("like_count"),
            "Dislikes": info.get("dislike_count"),
            "Thumbnail": info.get("thumbnail"),
            "Tags": info.get("tags"),
        }

        # Process comments
        raw = info.get("comments") or []
        comments = []
        for c in raw:
            comments.append({
                "id": c.get("id"),
                "parent_id": c.get("parent"),
                "author": c.get("author"),
                "text": c.get("text"),
                "timestamp": c.get("timestamp"),
            })
        structured = structure_comments(comments)
        meta["Comments"] = structured

        # Statistics
        top_comments = len(structured)
        replies = sum(len(c.get("replies", [])) for c in structured)
        unique_authors = len({c.get("author") for c in comments if c.get("author")})

        # Save JSON
        title_safe = sanitize_filename(info.get("title", vid))
        outfile = os.path.join("output", f"{title_safe}_{vid}.json")
        with open(outfile, "w", encoding="utf-8") as fout:
            json.dump(meta, fout, ensure_ascii=False, indent=2)

        print(f"{outfile}: {top_comments} comments, {replies} replies, {unique_authors} unique authors")

if __name__ == '__main__':
    main()


[youtube] Extracting URL: https://www.youtube.com/watch?v=r5r_FS6wLhY
[youtube] r5r_FS6wLhY: Downloading webpage
[youtube] r5r_FS6wLhY: Downloading tv client config
[youtube] r5r_FS6wLhY: Downloading player 59b252b9-main
[youtube] r5r_FS6wLhY: Downloading tv player API JSON
[youtube] r5r_FS6wLhY: Downloading ios player API JSON


         n = u26DCKiPHgHROCfRmU0v ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = wni0-zYagv2iKQgBhddh ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = WDSY6_a0jpuqWFCDBJO3 ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


[youtube] r5r_FS6wLhY: Downloading m3u8 information
[youtube] Downloading comment section API JSON
[youtube] Downloading ~8326 comments
[youtube] Sorting comments by newest first
[youtube] Downloading comment API JSON page 1 (0/~8326)
[youtube] Downloading comment API JSON page 2 (20/~8326)
[youtube] Downloading comment API JSON page 3 (40/~8326)
[youtube] Downloading comment API JSON page 4 (60/~8326)
[youtube] Downloading comment API JSON page 5 (80/~8326)
[youtube] Downloading comment API JSON page 6 (100/~8326)
[youtube]     Downloading comment API JSON reply thread 1 (120/~8326)
[youtube] Downloading comment API JSON page 7 (121/~8326)
[youtube]     Downloading comment API JSON reply thread 1 (125/~8326)
[youtube]     Downloading comment API JSON reply thread 2 (127/~8326)
[youtube]     Downloading comment API JSON reply thread 3 (132/~8326)
[youtube] Downloading comment API JSON page 8 (144/~8326)
[youtube]     Downloading comment API JSON reply thread 1 (157/~8326)
[youtube]    



[youtube] Downloading comment API JSON page 44 (941/~8326)
[youtube]     Downloading comment API JSON reply thread 1 (950/~8326)
[youtube] Downloading comment API JSON page 45 (962/~8326)
[youtube]     Downloading comment API JSON reply thread 1 (965/~8326)
[youtube]     Downloading comment API JSON reply thread 2 (981/~8326)
[youtube] Downloading comment API JSON page 46 (984/~8326)
[youtube]     Downloading comment API JSON reply thread 1 (993/~8326)
[youtube] Downloading comment API JSON page 47 (1005/~8326)
[youtube]     Downloading comment API JSON reply thread 1 (1008/~8326)
[youtube]     Downloading comment API JSON reply thread 2 (1013/~8326)
[youtube]     Downloading comment API JSON reply thread 3 (1018/~8326)
[youtube]     Downloading comment API JSON reply thread 4 (1022/~8326)
[youtube] Downloading comment API JSON page 48 (1031/~8326)
[youtube] Downloading comment API JSON page 49 (1051/~8326)
[youtube] Downloading comment API JSON page 50 (1071/~8326)
[youtube] Downloadi

         n = TVCZDSUP0Twed5BBD35v ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = KUegpNj8KE_O2SRTV5YJ ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = 7T7_eLdKs8VdGceBJw1W ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


[youtube] TbiPcMCz0Ek: Downloading m3u8 information
[youtube] Downloading comment section API JSON
[youtube] Downloading ~75320 comments
[youtube] Sorting comments by newest first
[youtube] Downloading comment API JSON page 1 (0/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (9/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (16/~75320)
[youtube] Downloading comment API JSON page 2 (22/~75320)
[youtube] Downloading comment API JSON page 3 (42/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (43/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (49/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (54/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (59/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (61/~75320)
[youtube]     Downloading comment API JSON reply thread 6 (65/~75320)
[youtube] Downloading comment API JSON page 4 (68/~75320)
[youtube] Downloading comment 



[youtube]     Downloading comment API JSON reply thread 2 (2639/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (2645/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (2650/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (2658/~75320)
[youtube] Downloading comment API JSON page 118 (2659/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (2661/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (2663/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (2669/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (2671/~75320)
[youtube] Downloading comment API JSON page 119 (2683/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (2684/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (2686/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (2688/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (2704/~75320)
[youtube] Do



[youtube] Downloading comment API JSON page 256 (6199/~75320)
[youtube] Downloading comment API JSON page 257 (6219/~75320)
[youtube] Downloading comment API JSON page 258 (6239/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (6249/~75320)
[youtube] Downloading comment API JSON page 259 (6260/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (6263/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (6277/~75320)
[youtube] Downloading comment API JSON page 260 (6282/~75320)
[youtube] Downloading comment API JSON page 261 (6302/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (6305/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (6312/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (6317/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (6319/~75320)
[youtube] Downloading comment API JSON page 262 (6328/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (633



[youtube] Downloading comment API JSON page 409 (10460/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (10463/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (10473/~75320)
[youtube] Downloading comment API JSON page 410 (10482/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (10485/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (10491/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (10500/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (10505/~75320)
[youtube] Downloading comment API JSON page 411 (10510/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (10511/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (10514/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (10519/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (10522/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (10528/~75320)
[youtube



[youtube] Downloading comment API JSON page 416 (10640/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (10642/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (10650/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (10664/~75320)
[youtube] Downloading comment API JSON page 417 (10667/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (10668/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (10672/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (10675/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (10680/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (10688/~75320)
[youtube] Downloading comment API JSON page 418 (10693/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (10705/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (10709/~75320)
[youtube] Downloading comment API JSON page 419 (10721/~75320)
[youtube]     Down



[youtube] Downloading comment API JSON page 617 (15635/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (15646/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (15655/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (15661/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (15672/~75320)
[youtube] Downloading comment API JSON page 618 (15676/~75320)
[youtube] Downloading comment API JSON page 619 (15696/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (15703/~75320)
[youtube] Downloading comment API JSON page 620 (15717/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (15731/~75320)
[youtube] Downloading comment API JSON page 621 (15740/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (15741/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (15753/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (15755/~75320)
[youtube] Downloading commen



[youtube] Downloading comment API JSON page 751 (18789/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (18806/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (18810/~75320)
[youtube] Downloading comment API JSON page 752 (18814/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (18828/~75320)
[youtube] Downloading comment API JSON page 753 (18835/~75320)
[youtube] Downloading comment API JSON page 754 (18855/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (18865/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (18867/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (18870/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (18876/~75320)
[youtube] Downloading comment API JSON page 755 (18883/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (18890/~75320)
[youtube] Downloading comment API JSON page 756 (18906/~75320)
[youtube]     Downloading comment API 



[youtube] Downloading comment API JSON page 845 (21706/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (21712/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (21719/~75320)
[youtube]        Downloading comment replies API JSON page 1 (21729/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (21746/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (21749/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (21752/~75320)
[youtube] Downloading comment API JSON page 846 (21754/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (21756/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (21762/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (21769/~75320)
[youtube] Downloading comment API JSON page 847 (21778/~75320)
[youtube] Downloading comment API JSON page 848 (21798/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (21818/~75320)
[youtube] Downl



[youtube]     Downloading comment API JSON reply thread 4 (30632/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (30645/~75320)
[youtube]     Downloading comment API JSON reply thread 6 (30648/~75320)
[youtube]     Downloading comment API JSON reply thread 7 (30650/~75320)
[youtube] Downloading comment API JSON page 1188 (30654/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (30668/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (30670/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (30672/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (30677/~75320)
[youtube] Downloading comment API JSON page 1189 (30679/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (30680/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (30683/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (30688/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (30694/~753



[youtube] Downloading comment API JSON page 1202 (31031/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (31032/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (31036/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (31049/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (31052/~75320)
[youtube] Downloading comment API JSON page 1203 (31060/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (31069/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (31071/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (31077/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (31079/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (31081/~75320)
[youtube]     Downloading comment API JSON reply thread 6 (31084/~75320)
[youtube]     Downloading comment API JSON reply thread 7 (31086/~75320)
[youtube] Downloading comment API JSON page 1204 (31088/~75320)
[yout



[youtube]     Downloading comment API JSON reply thread 5 (31517/~75320)
[youtube]     Downloading comment API JSON reply thread 6 (31520/~75320)
[youtube]     Downloading comment API JSON reply thread 7 (31522/~75320)
[youtube]     Downloading comment API JSON reply thread 8 (31524/~75320)
[youtube]     Downloading comment API JSON reply thread 9 (31526/~75320)
[youtube] Downloading comment API JSON page 1214 (31534/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (31537/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (31540/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (31543/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (31556/~75320)
[youtube] Downloading comment API JSON page 1215 (31558/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (31571/~75320)
[youtube] Downloading comment API JSON page 1216 (31583/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (31598/~75320)
[yout



[youtube]     Downloading comment API JSON reply thread 2 (38612/~75320)
[youtube] Downloading comment API JSON page 1476 (38617/~75320)
[youtube] Downloading comment API JSON page 1477 (38637/~75320)
[youtube] Downloading comment API JSON page 1478 (38657/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (38664/~75320)
[youtube] Downloading comment API JSON page 1479 (38678/~75320)
[youtube] Downloading comment API JSON page 1480 (38698/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (38709/~75320)
[youtube] Downloading comment API JSON page 1481 (38719/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (38728/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (38737/~75320)
[youtube] Downloading comment API JSON page 1482 (38742/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (38745/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (38752/~75320)
[youtube] Downloading comment API JSON pa



[youtube] Downloading comment API JSON page 1510 (39855/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (39862/~75320)
[youtube]        Downloading comment replies API JSON page 1 (39872/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (39895/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (39900/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (39906/~75320)
[youtube] Downloading comment API JSON page 1511 (39910/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (39913/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (39915/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (39922/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (39924/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (39926/~75320)
[youtube]     Downloading comment API JSON reply thread 6 (39929/~75320)
[youtube] Downloading comment API JSON page 1512 (39949/~75320)
[y



[youtube]     Downloading comment API JSON reply thread 3 (48429/~75320)
[youtube] Downloading comment API JSON page 1749 (48431/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (48443/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (48445/~75320)
[youtube] Downloading comment API JSON page 1750 (48453/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (48461/~75320)
[youtube] Downloading comment API JSON page 1751 (48475/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (48488/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (48496/~75320)
[youtube] Downloading comment API JSON page 1752 (48499/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (48501/~75320)
[youtube] Downloading comment API JSON page 1753 (48520/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (48539/~75320)
[youtube] Downloading comment API JSON page 1754 (48543/~75320)
[youtube] Downloading comment AP



[youtube]     Downloading comment API JSON reply thread 2 (51204/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (51221/~75320)
[youtube] Downloading comment API JSON page 1830 (51224/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (51236/~75320)
[youtube] Downloading comment API JSON page 1831 (51245/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (51265/~75320)
[youtube] Downloading comment API JSON page 1832 (51267/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (51268/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (51278/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (51291/~75320)
[youtube] Downloading comment API JSON page 1833 (51292/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (51307/~75320)
[youtube] Downloading comment API JSON page 1834 (51314/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (51327/~75320)
[youtube] Downloading c



[youtube] Downloading comment API JSON page 1882 (52704/~75320)
[youtube] Downloading comment API JSON page 1883 (52724/~75320)
[youtube] Downloading comment API JSON page 1884 (52744/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (52758/~75320)
[youtube] Downloading comment API JSON page 1885 (52765/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (52769/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (52785/~75320)
[youtube] Downloading comment API JSON page 1886 (52788/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (52791/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (52794/~75320)
[youtube] Downloading comment API JSON page 1887 (52810/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (52820/~75320)
[youtube] Downloading comment API JSON page 1888 (52832/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (52844/~75320)
[youtube]     Downloading comment API JSO



[youtube]     Downloading comment API JSON reply thread 1 (56063/~75320)
[youtube] Downloading comment API JSON page 1998 (56073/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (56077/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (56081/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (56092/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (56095/~75320)
[youtube] Downloading comment API JSON page 1999 (56097/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (56098/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (56109/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (56113/~75320)
[youtube] Downloading comment API JSON page 2000 (56122/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (56125/~75320)
[youtube] Downloading comment API JSON page 2001 (56144/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (56148/~75320)
[youtube]     



[youtube]        Downloading comment replies API JSON page 1 (56944/~75320)
[youtube] Downloading comment API JSON page 2028 (56962/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (56972/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (56978/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (56983/~75320)
[youtube] Downloading comment API JSON page 2029 (56985/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (56990/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (56992/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (56996/~75320)
[youtube]     Downloading comment API JSON reply thread 4 (57000/~75320)
[youtube]     Downloading comment API JSON reply thread 5 (57008/~75320)
[youtube] Downloading comment API JSON page 2030 (57010/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (57012/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (57018/~75320)
[y



[youtube]     Downloading comment API JSON reply thread 1 (58302/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (58304/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (58306/~75320)
[youtube] Downloading comment API JSON page 2075 (58318/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (58320/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (58323/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (58339/~75320)
[youtube] Downloading comment API JSON page 2076 (58342/~75320)
[youtube] Downloading comment API JSON page 2077 (58362/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (58372/~75320)
[youtube]        Downloading comment replies API JSON page 1 (58382/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (58392/~75320)
[youtube]        Downloading comment replies API JSON page 1 (58402/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (58412/~75320)



[youtube] Downloading comment API JSON page 2418 (70274/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (70293/~75320)
[youtube] Downloading comment API JSON page 2419 (70295/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (70309/~75320)
[youtube] Downloading comment API JSON page 2420 (70317/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (70336/~75320)
[youtube] Downloading comment API JSON page 2421 (70338/~75320)
[youtube] Downloading comment API JSON page 2422 (70358/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (70363/~75320)
[youtube] Downloading comment API JSON page 2423 (70379/~75320)
[youtube]     Downloading comment API JSON reply thread 1 (70383/~75320)
[youtube]     Downloading comment API JSON reply thread 2 (70396/~75320)
[youtube]     Downloading comment API JSON reply thread 3 (70401/~75320)
[youtube] Downloading comment API JSON page 2424 (70409/~75320)
[youtube]     Downloading comment API JSO

         n = 5ScdbmaJ27JJBzeOsGvO ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = ptwSwihP6m9XbrBfB2LN ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = PUnxrYkCOhB-RUBkApm0 ; player = https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


[youtube] UApAmUTUCbg: Downloading m3u8 information
[youtube] Downloading comment section API JSON
[youtube] Downloading ~15665 comments
[youtube] Sorting comments by newest first
[youtube] Downloading comment API JSON page 1 (0/~15665)
[youtube] Downloading comment API JSON page 2 (20/~15665)
[youtube] Downloading comment API JSON page 3 (40/~15665)
[youtube] Downloading comment API JSON page 4 (60/~15665)
[youtube] Downloading comment API JSON page 5 (80/~15665)
[youtube] Downloading comment API JSON page 6 (100/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (119/~15665)
[youtube] Downloading comment API JSON page 7 (121/~15665)
[youtube] Downloading comment API JSON page 8 (141/~15665)
[youtube] Downloading comment API JSON page 9 (161/~15665)
[youtube] Downloading comment API JSON page 10 (181/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (198/~15665)
[youtube] Downloading comment API JSON page 11 (202/~15665)
[youtube]     Downloading comment



[youtube] Downloading comment API JSON page 399 (9360/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (9372/~15665)
[youtube]     Downloading comment API JSON reply thread 2 (9377/~15665)
[youtube]     Downloading comment API JSON reply thread 3 (9380/~15665)
[youtube] Downloading comment API JSON page 400 (9384/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (9399/~15665)
[youtube] Downloading comment API JSON page 401 (9405/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (9425/~15665)
[youtube] Downloading comment API JSON page 402 (9426/~15665)
[youtube] Downloading comment API JSON page 403 (9446/~15665)
[youtube] Downloading comment API JSON page 404 (9466/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (9468/~15665)
[youtube]     Downloading comment API JSON reply thread 2 (9487/~15665)
[youtube] Downloading comment API JSON page 405 (9493/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (949



[youtube] Downloading comment API JSON page 498 (11722/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (11724/~15665)
[youtube] Downloading comment API JSON page 499 (11743/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (11744/~15665)
[youtube]     Downloading comment API JSON reply thread 2 (11751/~15665)
[youtube] Downloading comment API JSON page 500 (11766/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (11776/~15665)
[youtube]     Downloading comment API JSON reply thread 2 (11788/~15665)
[youtube]        Downloading comment replies API JSON page 1 (11798/~15665)
[youtube]        Downloading comment replies API JSON page 2 (11848/~15665)
[youtube] Downloading comment API JSON page 501 (11864/~15665)
[youtube] Downloading comment API JSON page 502 (11884/~15665)
[youtube] Downloading comment API JSON page 503 (11904/~15665)
[youtube]     Downloading comment API JSON reply thread 1 (11923/~15665)
[youtube] Downloading comment AP

## Sample Data

In [21]:
import json
import os
import random

input_file = "Youtube_combined_Full.json"
output_file = "Youtube_combined_Final.json"
target_size_bytes = 7 * 1024 * 1024  # 7MB

with open(input_file, "r", encoding="utf-8") as f:
    all_videos = json.load(f)

random.shuffle(all_videos)

sample = []
current_size = 0

for video in all_videos:
    video_encoded = json.dumps(video, ensure_ascii=False).encode("utf-8")
    video_size = len(video_encoded)

    if video_size > target_size_bytes:
        continue  

    if current_size + video_size > target_size_bytes:
        continue  

    sample.append(video)
    current_size += video_size

with open(output_file, "w", encoding="utf-8") as f:
    json.dump(sample, f, indent=2, ensure_ascii=False)

actual_size = os.path.getsize(output_file) / (1024 * 1024)
print(f"Sample created: {output_file} ({actual_size:.2f} MB)")
print(f"Total videos included: {len(sample)}")


Sample created: Youtube_combined_Final.json (7.73 MB)
Total videos included: 2
