In [1]:
#Video 1. Trump’s Tariff King Punches: Hitting China, the U.S. Dollar, and America — A Reshaping of Global Trade Is Inevitable
import os
import re
import requests
import json
import hashlib
import urllib.parse
import time
import csv
import pandas as pd

# === 1. Securely read cookies ===
def get_header(cookie_path='bili_cookie1.txt'):
    if not os.path.exists(cookie_path):
        raise FileNotFoundError(f"Cookie file '{cookie_path}' not found. Please check the path.")
    with open(cookie_path, 'r', encoding='utf-8') as f:
        cookie = f.read().strip()
    headers = {
        "Cookie": cookie,
        "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
    }
    return headers

# === 2. Retrieve video information (OID + title) ===
def get_video_info(bv):
    url = f"https://www.bilibili.com/video/{bv}"
    resp = requests.get(url, headers=get_header())
    resp.raise_for_status()
    text = resp.text
    
    oid_match = re.search(r'"aid":(\d+),"bvid":"{}"'.format(bv), text)
    title_match = re.search(r'<title.*?>(.*?)</title>', text)
    
    if not oid_match or not title_match:
        raise ValueError("Failed to extract OID or Title from the video page.")
    
    oid = oid_match.group(1)
    title = title_match.group(1).strip().replace('/', '_')  # 防止文件名非法字符
    return oid, title

# === 3. Scrape comments ===
def crawl_comments(bv, oid, csv_writer, is_second=True, page_cursor=''):
    mode, plat, type_, web_location = 2, 1, 1, 1315875
    timestamp = int(time.time())
    
    pagination = {"offset": ""} if not page_cursor else {"offset": json.dumps({"type": 3, "direction": 1, "Data": {"cursor": page_cursor}})}
    pagination_str = json.dumps(pagination, separators=(',', ':'))
    
    # Generate w_rid
    params = f"mode={mode}&oid={oid}&pagination_str={urllib.parse.quote(pagination_str)}&plat={plat}&seek_rpid=&type={type_}&web_location={web_location}&wts={timestamp}ea1db124af3c7062474693fa704f4ff8"
    w_rid = hashlib.md5(params.encode('utf-8')).hexdigest()
    
    url = (
        f"https://api.bilibili.com/x/v2/reply/wbi/main?"
        f"oid={oid}&type={type_}&mode={mode}&pagination_str={urllib.parse.quote(pagination_str, safe=':')}"
        f"&plat={plat}&seek_rpid=&web_location={web_location}&w_rid={w_rid}&wts={timestamp}"
    )
    
    response = requests.get(url, headers=get_header())
    response.raise_for_status()
    data = response.json()
    
    replies = data.get('data', {}).get('replies', [])
    if not replies:
        return None  # No more replies

    count = 0
    for reply in replies:
        count += 1
        write_comment(reply, csv_writer)
        
        # If secondary (nested) comments are enabled
        if is_second and reply.get('rcount', 0) > 0:
            crawl_sub_comments(oid, reply['rpid'], csv_writer)
    
    next_cursor = data['data']['cursor']['next']
    return next_cursor if next_cursor != 0 else None

# === 4. Scrape secondary (nested) comments ===
def crawl_sub_comments(oid, root_rpid, csv_writer):
    page = 1
    while True:
        url = f"https://api.bilibili.com/x/v2/reply/reply?oid={oid}&type=1&root={root_rpid}&ps=10&pn={page}&web_location=333.788"
        response = requests.get(url, headers=get_header())
        response.raise_for_status()
        data = response.json()
        replies = data.get('data', {}).get('replies', [])
        if not replies:
            break
        
        for reply in replies:
            write_comment(reply, csv_writer)
        
        page += 1
        time.sleep(0.2)  # Avoid getting banned by scraping too quickly

# === 5. Write a single comment to CSV ===
def write_comment(reply, csv_writer):
    try:
        csv_writer.writerow([
            reply.get('parent', ''),
            reply.get('rpid', ''),
            reply.get('mid', ''),
            reply['member'].get('uname', ''),
            reply['member']['level_info'].get('current_level', ''),
            reply['member'].get('sex', ''),
            reply['content'].get('message', '').replace('\n', ' '),
            pd.to_datetime(reply['ctime'], unit='s'),
            reply['reply_control'].get('sub_reply_entry_text', '0').strip(),
            reply.get('like', 0),
            reply['member'].get('sign', ''),
            reply['reply_control'].get('location', '未知')[5:] if 'location' in reply['reply_control'] else '未知',
            '是' if reply['member']['vip'].get('vipStatus', 0) != 0 else '否',
            reply['member'].get('avatar', '')
        ])
    except Exception as e:
        print(f"写入评论出错: {e}")

# ===6. Main program entry point ===
def main():
    bv = input("请输入B站BV号（如BV1CDdWYHEtU）: ").strip()
    oid, title = get_video_info(bv)
    output_file = f"{title[:20]}_评论.csv"
    
    with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow(['上级评论ID', '评论ID', '用户ID', '用户名', '用户等级', '性别', '评论内容', '评论时间', '回复数', '点赞数', '个性签名', 'IP属地', '是否是大会员', '头像'])
        
        print(f"Starting to scrape comments for {title}...")
        page_cursor = ''
        while page_cursor is not None:
            page_cursor = crawl_comments(bv, oid, writer, is_second=True, page_cursor=page_cursor)
            time.sleep(0.5)

        print(f"Scraping complete! Results saved as {output_file}")


if __name__ == "__main__":
    main()


Starting to scrape comments for 特朗普关税王八拳，打中国，打美元，国际贸易必然重塑_哔哩哔哩_bilibili...
Scraping complete! Results saved as 特朗普关税王八拳，打中国，打美元，国际贸_评论.csv


In [2]:
#Video 2: [Hilarious Daily Show] Americans Roast Trump for Raising Tariffs — Packed with High-Energy Sarcasm
import os
import re
import requests
import json
import hashlib
import urllib.parse
import time
import csv
import pandas as pd

# === 1. Securely read cookies ===
def get_header(cookie_path='bili_cookie2.txt'):
    if not os.path.exists(cookie_path):
        raise FileNotFoundError(f"Cookie file '{cookie_path}' not found. Please check the path.")
    with open(cookie_path, 'r', encoding='utf-8') as f:
        cookie = f.read().strip()
    headers = {
        "Cookie": cookie,
        "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
    }
    return headers

# === 2. Retrieve video information (OID + title) ===
def get_video_info(bv):
    url = f"https://www.bilibili.com/video/{bv}"
    resp = requests.get(url, headers=get_header())
    resp.raise_for_status()
    text = resp.text
    
    oid_match = re.search(r'"aid":(\d+),"bvid":"{}"'.format(bv), text)
    title_match = re.search(r'<title.*?>(.*?)</title>', text)
    
    if not oid_match or not title_match:
        raise ValueError("Failed to extract OID or Title from the video page.")
    
    oid = oid_match.group(1)
    title = title_match.group(1).strip().replace('/', '_')  # 防止文件名非法字符
    return oid, title

# === 3. Scrape comments ===
def crawl_comments(bv, oid, csv_writer, is_second=True, page_cursor=''):
    mode, plat, type_, web_location = 2, 1, 1, 1315875
    timestamp = int(time.time())
    
    pagination = {"offset": ""} if not page_cursor else {"offset": json.dumps({"type": 3, "direction": 1, "Data": {"cursor": page_cursor}})}
    pagination_str = json.dumps(pagination, separators=(',', ':'))
    
    # Generate w_rid
    params = f"mode={mode}&oid={oid}&pagination_str={urllib.parse.quote(pagination_str)}&plat={plat}&seek_rpid=&type={type_}&web_location={web_location}&wts={timestamp}ea1db124af3c7062474693fa704f4ff8"
    w_rid = hashlib.md5(params.encode('utf-8')).hexdigest()
    
    url = (
        f"https://api.bilibili.com/x/v2/reply/wbi/main?"
        f"oid={oid}&type={type_}&mode={mode}&pagination_str={urllib.parse.quote(pagination_str, safe=':')}"
        f"&plat={plat}&seek_rpid=&web_location={web_location}&w_rid={w_rid}&wts={timestamp}"
    )
    
    response = requests.get(url, headers=get_header())
    response.raise_for_status()
    data = response.json()
    
    replies = data.get('data', {}).get('replies', [])
    if not replies:
        return None  # No more replies

    count = 0
    for reply in replies:
        count += 1
        write_comment(reply, csv_writer)
        
        # If secondary (nested) comments are enabled
        if is_second and reply.get('rcount', 0) > 0:
            crawl_sub_comments(oid, reply['rpid'], csv_writer)
    
    next_cursor = data['data']['cursor']['next']
    return next_cursor if next_cursor != 0 else None

# === 4. Scrape secondary (nested) comments ===
def crawl_sub_comments(oid, root_rpid, csv_writer):
    page = 1
    while True:
        url = f"https://api.bilibili.com/x/v2/reply/reply?oid={oid}&type=1&root={root_rpid}&ps=10&pn={page}&web_location=333.788"
        response = requests.get(url, headers=get_header())
        response.raise_for_status()
        data = response.json()
        replies = data.get('data', {}).get('replies', [])
        if not replies:
            break
        
        for reply in replies:
            write_comment(reply, csv_writer)
        
        page += 1
        time.sleep(0.2)  # Avoid getting banned by scraping too quickly

# === 5. Write a single comment to CSV ===
def write_comment(reply, csv_writer):
    try:
        csv_writer.writerow([
            reply.get('parent', ''),
            reply.get('rpid', ''),
            reply.get('mid', ''),
            reply['member'].get('uname', ''),
            reply['member']['level_info'].get('current_level', ''),
            reply['member'].get('sex', ''),
            reply['content'].get('message', '').replace('\n', ' '),
            pd.to_datetime(reply['ctime'], unit='s'),
            reply['reply_control'].get('sub_reply_entry_text', '0').strip(),
            reply.get('like', 0),
            reply['member'].get('sign', ''),
            reply['reply_control'].get('location', '未知')[5:] if 'location' in reply['reply_control'] else '未知',
            '是' if reply['member']['vip'].get('vipStatus', 0) != 0 else '否',
            reply['member'].get('avatar', '')
        ])
    except Exception as e:
        print(f"写入评论出错: {e}")

# === 6. Main program entry point ===
def main():
    bv = input("请输入B站BV号（如BV1CDdWYHEtU）: ").strip()
    oid, title = get_video_info(bv)
    output_file = f"{title[:20]}_评论.csv"
    
    with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow(['上级评论ID', '评论ID', '用户ID', '用户名', '用户等级', '性别', '评论内容', '评论时间', '回复数', '点赞数', '个性签名', 'IP属地', '是否是大会员', '头像'])
        
        print(f"Starting to scrape comments for {title}...")
        page_cursor = ''
        while page_cursor is not None:
            page_cursor = crawl_comments(bv, oid, writer, is_second=True, page_cursor=page_cursor)
            time.sleep(0.5)

        print(f"Scraping complete! Results saved as {output_file}")

if __name__ == "__main__":
    main()


Starting to scrape comments for 【爆笑每日秀】美国人吐槽特朗普加征关税，全程高能讽刺_哔哩哔哩_bilibili...
Scraping complete! Results saved as 【爆笑每日秀】美国人吐槽特朗普加征关税，_评论.csv


In [3]:
#Video 3: Trump Implements Reciprocal Tariffs — Do You Even Have Bullets in Your Guns?
import os
import re
import requests
import json
import hashlib
import urllib.parse
import time
import csv
import pandas as pd

# === 1. Securely read cookies ===
def get_header(cookie_path='bili_cookie3.txt'):
    if not os.path.exists(cookie_path):
        raise FileNotFoundError(f"Cookie file '{cookie_path}' not found. Please check the path.")
    with open(cookie_path, 'r', encoding='utf-8') as f:
        cookie = f.read().strip()
    headers = {
        "Cookie": cookie,
        "User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
    }
    return headers

# === 2. Retrieve video information (OID + title) ===
def get_video_info(bv):
    url = f"https://www.bilibili.com/video/{bv}"
    resp = requests.get(url, headers=get_header())
    resp.raise_for_status()
    text = resp.text
    
    oid_match = re.search(r'"aid":(\d+),"bvid":"{}"'.format(bv), text)
    title_match = re.search(r'<title.*?>(.*?)</title>', text)
    
    if not oid_match or not title_match:
        raise ValueError("Failed to extract OID or Title from the video page.")
    
    oid = oid_match.group(1)
    title = title_match.group(1).strip().replace('/', '_')  # 防止文件名非法字符
    return oid, title

# === 3. Scrape comments ===
def crawl_comments(bv, oid, csv_writer, is_second=True, page_cursor=''):
    mode, plat, type_, web_location = 2, 1, 1, 1315875
    timestamp = int(time.time())
    
    pagination = {"offset": ""} if not page_cursor else {"offset": json.dumps({"type": 3, "direction": 1, "Data": {"cursor": page_cursor}})}
    pagination_str = json.dumps(pagination, separators=(',', ':'))
    
    # 生成w_rid
    params = f"mode={mode}&oid={oid}&pagination_str={urllib.parse.quote(pagination_str)}&plat={plat}&seek_rpid=&type={type_}&web_location={web_location}&wts={timestamp}ea1db124af3c7062474693fa704f4ff8"
    w_rid = hashlib.md5(params.encode('utf-8')).hexdigest()
    
    url = (
        f"https://api.bilibili.com/x/v2/reply/wbi/main?"
        f"oid={oid}&type={type_}&mode={mode}&pagination_str={urllib.parse.quote(pagination_str, safe=':')}"
        f"&plat={plat}&seek_rpid=&web_location={web_location}&w_rid={w_rid}&wts={timestamp}"
    )
    
    response = requests.get(url, headers=get_header())
    response.raise_for_status()
    data = response.json()
    
    replies = data.get('data', {}).get('replies', [])
    if not replies:
        return None  # No more replies

    count = 0
    for reply in replies:
        count += 1
        write_comment(reply, csv_writer)
        
        #  If secondary (nested) comments are enabled
        if is_second and reply.get('rcount', 0) > 0:
            crawl_sub_comments(oid, reply['rpid'], csv_writer)
    
    next_cursor = data['data']['cursor']['next']
    return next_cursor if next_cursor != 0 else None

# === 4. Scrape secondary (nested) comments ===
def crawl_sub_comments(oid, root_rpid, csv_writer):
    page = 1
    while True:
        url = f"https://api.bilibili.com/x/v2/reply/reply?oid={oid}&type=1&root={root_rpid}&ps=10&pn={page}&web_location=333.788"
        response = requests.get(url, headers=get_header())
        response.raise_for_status()
        data = response.json()
        replies = data.get('data', {}).get('replies', [])
        if not replies:
            break
        
        for reply in replies:
            write_comment(reply, csv_writer)
        
        page += 1
        time.sleep(0.2)  # Avoid getting banned by scraping too quickly

# === 5. Write a single comment to CSV ===
def write_comment(reply, csv_writer):
    try:
        csv_writer.writerow([
            reply.get('parent', ''),
            reply.get('rpid', ''),
            reply.get('mid', ''),
            reply['member'].get('uname', ''),
            reply['member']['level_info'].get('current_level', ''),
            reply['member'].get('sex', ''),
            reply['content'].get('message', '').replace('\n', ' '),
            pd.to_datetime(reply['ctime'], unit='s'),
            reply['reply_control'].get('sub_reply_entry_text', '0').strip(),
            reply.get('like', 0),
            reply['member'].get('sign', ''),
            reply['reply_control'].get('location', '未知')[5:] if 'location' in reply['reply_control'] else '未知',
            '是' if reply['member']['vip'].get('vipStatus', 0) != 0 else '否',
            reply['member'].get('avatar', '')
        ])
    except Exception as e:
        print(f"写入评论出错: {e}")

# === 6. Main program entry point ===
def main():
    bv = input("请输入B站BV号（如BV1CDdWYHEtU）: ").strip()
    oid, title = get_video_info(bv)
    output_file = f"{title[:20]}_评论.csv"
    
    with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow(['上级评论ID', '评论ID', '用户ID', '用户名', '用户等级', '性别', '评论内容', '评论时间', '回复数', '点赞数', '个性签名', 'IP属地', '是否是大会员', '头像'])
        
        print(f"Starting to scrape comments for {title}...")
        page_cursor = ''
        while page_cursor is not None:
            page_cursor = crawl_comments(bv, oid, writer, is_second=True, page_cursor=page_cursor)
            time.sleep(0.5)

        print(f"Scraping complete! Results saved as {output_file}")

if __name__ == "__main__":
    main()


Starting to scrape comments for 特朗普实施对等关税，赌你们的枪里没有子弹？_哔哩哔哩_bilibili...
Scraping complete! Results saved as 特朗普实施对等关税，赌你们的枪里没有子弹_评论.csv


In [4]:
#Video 4: [Wu Shi] Behind the Global Tariff Chaos Lies the Final Battle That Must Not Be Lost 
import os
import re
import requests
import json
import hashlib
import urllib.parse
import time
import csv
import pandas as pd

# === 1. Securely read cookies ===
def get_header(cookie_path='bili_cookie4.txt'):
    if not os.path.exists(cookie_path):
        raise FileNotFoundError(f"Cookie file '{cookie_path}' not found. Please check the path.")
    with open(cookie_path, 'r', encoding='utf-8') as f:
        cookie = f.read().strip()
    headers = {
        "Cookie": cookie,
        "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
    }
    return headers

# === 2. Retrieve video information (OID + title) ===
def get_video_info(bv):
    url = f"https://www.bilibili.com/video/{bv}"
    resp = requests.get(url, headers=get_header())
    resp.raise_for_status()
    text = resp.text
    
    oid_match = re.search(r'"aid":(\d+),"bvid":"{}"'.format(bv), text)
    title_match = re.search(r'<title.*?>(.*?)</title>', text)
    
    if not oid_match or not title_match:
        raise ValueError("Failed to extract OID or Title from the video page.")
    
    oid = oid_match.group(1)
    title = title_match.group(1).strip().replace('/', '_')  # 防止文件名非法字符
    return oid, title

# === 3. Scrape comments ===
def crawl_comments(bv, oid, csv_writer, is_second=True, page_cursor=''):
    mode, plat, type_, web_location = 2, 1, 1, 1315875
    timestamp = int(time.time())
    
    pagination = {"offset": ""} if not page_cursor else {"offset": json.dumps({"type": 3, "direction": 1, "Data": {"cursor": page_cursor}})}
    pagination_str = json.dumps(pagination, separators=(',', ':'))
    
    # Generate w_rid
    params = f"mode={mode}&oid={oid}&pagination_str={urllib.parse.quote(pagination_str)}&plat={plat}&seek_rpid=&type={type_}&web_location={web_location}&wts={timestamp}ea1db124af3c7062474693fa704f4ff8"
    w_rid = hashlib.md5(params.encode('utf-8')).hexdigest()
    
    url = (
        f"https://api.bilibili.com/x/v2/reply/wbi/main?"
        f"oid={oid}&type={type_}&mode={mode}&pagination_str={urllib.parse.quote(pagination_str, safe=':')}"
        f"&plat={plat}&seek_rpid=&web_location={web_location}&w_rid={w_rid}&wts={timestamp}"
    )
    
    response = requests.get(url, headers=get_header())
    response.raise_for_status()
    data = response.json()
    
    replies = data.get('data', {}).get('replies', [])
    if not replies:
        return None  # No more replies

    count = 0
    for reply in replies:
        count += 1
        write_comment(reply, csv_writer)
        
        # If secondary (nested) comments are enabled
        if is_second and reply.get('rcount', 0) > 0:
            crawl_sub_comments(oid, reply['rpid'], csv_writer)
    
    next_cursor = data['data']['cursor']['next']
    return next_cursor if next_cursor != 0 else None

# === 4. Scrape secondary (nested) comments ===
def crawl_sub_comments(oid, root_rpid, csv_writer):
    page = 1
    while True:
        url = f"https://api.bilibili.com/x/v2/reply/reply?oid={oid}&type=1&root={root_rpid}&ps=10&pn={page}&web_location=333.788"
        response = requests.get(url, headers=get_header())
        response.raise_for_status()
        data = response.json()
        replies = data.get('data', {}).get('replies', [])
        if not replies:
            break
        
        for reply in replies:
            write_comment(reply, csv_writer)
        
        page += 1
        time.sleep(0.2)  # Avoid getting banned by scraping too quickly

# === 5. Write a single comment to CSV ===
def write_comment(reply, csv_writer):
    try:
        csv_writer.writerow([
            reply.get('parent', ''),
            reply.get('rpid', ''),
            reply.get('mid', ''),
            reply['member'].get('uname', ''),
            reply['member']['level_info'].get('current_level', ''),
            reply['member'].get('sex', ''),
            reply['content'].get('message', '').replace('\n', ' '),
            pd.to_datetime(reply['ctime'], unit='s'),
            reply['reply_control'].get('sub_reply_entry_text', '0').strip(),
            reply.get('like', 0),
            reply['member'].get('sign', ''),
            reply['reply_control'].get('location', '未知')[5:] if 'location' in reply['reply_control'] else '未知',
            '是' if reply['member']['vip'].get('vipStatus', 0) != 0 else '否',
            reply['member'].get('avatar', '')
        ])
    except Exception as e:
        print(f"写入评论出错: {e}")

# === 6. Main program entry point ===
def main():
    bv = input("请输入B站BV号（如BV1CDdWYHEtU）: ").strip()
    oid, title = get_video_info(bv)
    output_file = f"{title[:20]}_评论.csv"
    
    with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow(['上级评论ID', '评论ID', '用户ID', '用户名', '用户等级', '性别', '评论内容', '评论时间', '回复数', '点赞数', '个性签名', 'IP属地', '是否是大会员', '头像'])
        
        print(f"开始爬取 {title} 的评论...")
        page_cursor = ''
        while page_cursor is not None:
            page_cursor = crawl_comments(bv, oid, writer, is_second=True, page_cursor=page_cursor)
            time.sleep(0.5)

        print(f"爬取完成！结果保存为 {output_file}")

if __name__ == "__main__":
    main()


开始爬取 【巫师】全球关税混战背后，是不能输的终局之战_哔哩哔哩_bilibili 的评论...
爬取完成！结果保存为 【巫师】全球关税混战背后，是不能输的终局_评论.csv


In [5]:
#Video 5. [Li Hai] Mar-a-Lago Agreement: What Is the Endgame Behind Trump’s Tariff Frenzy?
import os
import re
import requests
import json
import hashlib
import urllib.parse
import time
import csv
import pandas as pd

# === 1. Securely read cookies ===
def get_header(cookie_path='bili_cookie5.txt'):
    if not os.path.exists(cookie_path):
        raise FileNotFoundError(f"Cookie file '{cookie_path}' not found. Please check the path.")
    with open(cookie_path, 'r', encoding='utf-8') as f:
        cookie = f.read().strip()
    headers = {
        "Cookie": cookie,
        "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
    }
    return headers

# === 2. Retrieve video information (OID + title) ===
def get_video_info(bv):
    url = f"https://www.bilibili.com/video/{bv}"
    resp = requests.get(url, headers=get_header())
    resp.raise_for_status()
    text = resp.text
    
    oid_match = re.search(r'"aid":(\d+),"bvid":"{}"'.format(bv), text)
    title_match = re.search(r'<title.*?>(.*?)</title>', text)
    
    if not oid_match or not title_match:
        raise ValueError("Failed to extract OID or Title from the video page.")
    
    oid = oid_match.group(1)
    title = title_match.group(1).strip().replace('/', '_')  # 防止文件名非法字符
    return oid, title

# === 3. Scrape comments ===
def crawl_comments(bv, oid, csv_writer, is_second=True, page_cursor=''):
    mode, plat, type_, web_location = 2, 1, 1, 1315875
    timestamp = int(time.time())
    
    pagination = {"offset": ""} if not page_cursor else {"offset": json.dumps({"type": 3, "direction": 1, "Data": {"cursor": page_cursor}})}
    pagination_str = json.dumps(pagination, separators=(',', ':'))
    
    #Generate w_rid
    params = f"mode={mode}&oid={oid}&pagination_str={urllib.parse.quote(pagination_str)}&plat={plat}&seek_rpid=&type={type_}&web_location={web_location}&wts={timestamp}ea1db124af3c7062474693fa704f4ff8"
    w_rid = hashlib.md5(params.encode('utf-8')).hexdigest()
    
    url = (
        f"https://api.bilibili.com/x/v2/reply/wbi/main?"
        f"oid={oid}&type={type_}&mode={mode}&pagination_str={urllib.parse.quote(pagination_str, safe=':')}"
        f"&plat={plat}&seek_rpid=&web_location={web_location}&w_rid={w_rid}&wts={timestamp}"
    )
    
    response = requests.get(url, headers=get_header())
    response.raise_for_status()
    data = response.json()
    
    replies = data.get('data', {}).get('replies', [])
    if not replies:
        return None  # No more replies

    count = 0
    for reply in replies:
        count += 1
        write_comment(reply, csv_writer)
        
        #  If secondary (nested) comments are enabled
        if is_second and reply.get('rcount', 0) > 0:
            crawl_sub_comments(oid, reply['rpid'], csv_writer)
    
    next_cursor = data['data']['cursor']['next']
    return next_cursor if next_cursor != 0 else None

# === 4. Scrape secondary (nested) comments ===
def crawl_sub_comments(oid, root_rpid, csv_writer):
    page = 1
    while True:
        url = f"https://api.bilibili.com/x/v2/reply/reply?oid={oid}&type=1&root={root_rpid}&ps=10&pn={page}&web_location=333.788"
        response = requests.get(url, headers=get_header())
        response.raise_for_status()
        data = response.json()
        replies = data.get('data', {}).get('replies', [])
        if not replies:
            break
        
        for reply in replies:
            write_comment(reply, csv_writer)
        
        page += 1
        time.sleep(0.2)  # Avoid getting banned by scraping too quickly

# === 5. Write a single comment to CSV ===
def write_comment(reply, csv_writer):
    try:
        csv_writer.writerow([
            reply.get('parent', ''),
            reply.get('rpid', ''),
            reply.get('mid', ''),
            reply['member'].get('uname', ''),
            reply['member']['level_info'].get('current_level', ''),
            reply['member'].get('sex', ''),
            reply['content'].get('message', '').replace('\n', ' '),
            pd.to_datetime(reply['ctime'], unit='s'),
            reply['reply_control'].get('sub_reply_entry_text', '0').strip(),
            reply.get('like', 0),
            reply['member'].get('sign', ''),
            reply['reply_control'].get('location', '未知')[5:] if 'location' in reply['reply_control'] else '未知',
            '是' if reply['member']['vip'].get('vipStatus', 0) != 0 else '否',
            reply['member'].get('avatar', '')
        ])
    except Exception as e:
        print(f"写入评论出错: {e}")

# === 6. Main program entry point ===
def main():
    bv = input("请输入B站BV号（如BV1CDdWYHEtU）: ").strip()
    oid, title = get_video_info(bv)
    output_file = f"{title[:20]}_评论.csv"
    
    with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow(['上级评论ID', '评论ID', '用户ID', '用户名', '用户等级', '性别', '评论内容', '评论时间', '回复数', '点赞数', '个性签名', 'IP属地', '是否是大会员', '头像'])
        
        print(f"Starting to scrape comments for {title}...")
        page_cursor = ''
        while page_cursor is not None:
            page_cursor = crawl_comments(bv, oid, writer, is_second=True, page_cursor=page_cursor)
            time.sleep(0.5)

        print(f"Scraping complete! Results saved as {output_file}")

if __name__ == "__main__":
    main()

Starting to scrape comments for 【厉害】海湖庄园协议：特朗普疯狂关税背后，最终棋局是什么？_哔哩哔哩_bilibili...
Scraping complete! Results saved as 【厉害】海湖庄园协议：特朗普疯狂关税背后_评论.csv
