# Code headers for the given code:

**Purpose:**
This Python script extracts comments from Bilibili videos using their API and saves them to a CSV file. Specifically, it reads video URLs and titles from a CSV file, sends requests to the Bilibili API to retrieve comments, cleans the extracted data, and creates a Pandas dataframe. The cleaned data is then saved to a CSV file.


**Inputs:**
This script requires a CSV file named 'data.csv', containing two columns: 'link' and 'title'. The 'link' column contains the URLs of Bilibili videos, and the 'title' column contains their titles. Additionally, it requires the installation of several libraries, such as pandas and requests.


**Outputs:**
This script writes the extracted data to a CSV file named 'comments.csv'. The CSV file contains three columns: 'link', 'title', and 'comment'. The 'link' column contains the URL of the video, the 'title' column contains the title of the video, and the 'comment' column contains all comments associated with the video.


**Machine used:**
This script was developed and tested on a laptop computer running an unspecified operating system.
Operating system:
The operating system used to develop and test this script is not specified.


**Expected runtime:**
The expected runtime of this script depends on the number of videos being processed and the number of comments associated with each video. Typically, it takes several seconds to extract comments from each video. However, if the number of videos is large or the number of comments associated with each video is high, the expected runtime can increase significantly.





In [None]:
!pip install fake-useragent
from fake_useragent import UserAgent
ua = UserAgent()
print(ua.chrome)

Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24


In [None]:
import pandas as pd
import requests

# 读取CSV文件，包含 link, title 两列
df = pd.read_csv("/Users/sundaylu/Desktop/个人/BUsemester2/Wed7340/group/data.csv", dtype={"link": str, "title": str})

# 发送请求获取评论，并保存到新的DataFrame中
data = []
for _, row in df.iterrows():
    video_url = row["link"]
    video_id = video_url.split("/")[-2]
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24"}
    for page in range(1, 4):
        api_url = f"https://api.bilibili.com/x/v2/reply?pn={page}&type=1&oid={video_id}"
        response = requests.get(api_url, headers=headers)
        try:
            response_data = response.json()
            if "data" in response_data and response_data["code"] == 0:
                data.extend([{
                    "link": video_url,
                    "title": row["title"],
                    "comment": comment["content"]["message"]
                } for comment in response_data["data"]["replies"]])
            else:
                print(f"Error: Cannot extract comments from response data {response_data}")
        except KeyError as e:
            print(f"Error: {e}. Cannot extract comments")
        except Exception as e:
            print(f"Error: {e}. Response content: {response.content}")

# 将结果保存到CSV文件中
result_df = pd.DataFrame(data)
result_df.to_csv("/Users/sundaylu/Desktop/个人/BUsemester2/Wed7340/group/comment.csv", index=False, encoding="utf-8-sig")


Error: 'NoneType' object is not iterable. Response content: b'{"code":0,"message":"0","ttl":1,"data":{"page":{"num":2,"size":20,"count":2,"acount":2},"config":{"showtopic":1,"show_up_flag":true,"read_only":false},"replies":null,"upper":{"mid":263577015,"top":null,"vote":null},"top":null,"vote":0,"blacklist":0,"assist":0,"mode":3,"support_mode":[2,3],"control":{"input_disable":false,"root_input_text":"\xe5\x8f\x91\xe4\xb8\x80\xe6\x9d\xa1\xe5\x8f\x8b\xe5\x96\x84\xe7\x9a\x84\xe8\xaf\x84\xe8\xae\xba","child_input_text":"","giveup_input_text":"\xe4\xb8\x8d\xe5\x8f\x91\xe6\xb2\xa1\xe5\x85\xb3\xe7\xb3\xbb\xef\xbc\x8c\xe8\xaf\xb7\xe7\xbb\xa7\xe7\xbb\xad\xe5\x8f\x8b\xe5\x96\x84\xe5\x93\xa6~","screenshot_icon_state":3,"upload_picture_icon_state":3,"answer_guide_text":"\xe9\x9c\x80\xe8\xa6\x81\xe5\x8d\x87\xe7\xba\xa7\xe6\x88\x90\xe4\xb8\xbalv2\xe4\xbc\x9a\xe5\x91\x98\xe5\x90\x8e\xe6\x89\x8d\xe5\x8f\xaf\xe4\xbb\xa5\xe8\xaf\x84\xe8\xae\xba\xef\xbc\x8c\xe5\x85\x88\xe5\x8e\xbb\xe7\xad\x94\xe9\xa2\x98

In [None]:
import pandas as pd
import requests

# 读取CSV文件，包含 link, title 两列
df = pd.read_csv("/Users/sundaylu/Desktop/个人/BUsemester2/Wed7340/group/data.csv", dtype={"link": str, "title": str})

# 发送请求获取评论，并保存到新的DataFrame中
data = []
for _, row in df.iterrows():
    video_url = row["link"]
    video_id = video_url.split("/")[-2]
    api_url = f"https://api.bilibili.com/x/v2/reply?pn=1&type=1&oid={video_id}"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"}
    response = requests.get(api_url, headers=headers)
    try:
        response_data = response.json()
        if "data" in response_data and response_data["code"] == 0:
            data.extend([{
                "link": video_url,
                "title": row["title"],
                "comment": comment["content"]["message"]
            } for comment in response_data["data"]["replies"]])
        else:
            print(f"Error: Cannot extract comments from response data {response_data}")
    except KeyError as e:
        print(f"Error: {e}. Cannot extract comments")
    except Exception as e:
        print(f"Error: {e}. Response content: {response.content}")

# 将结果保存到CSV文件中
result_df = pd.DataFrame(data)
result_df.to_csv("/Users/sundaylu/Desktop/个人/BUsemester2/Wed7340/group/comments.csv", index=False, encoding="utf-8-sig")


Error: 'NoneType' object is not iterable. Response content: b'{"code":0,"message":"0","ttl":1,"data":{"page":{"num":1,"size":20,"count":0,"acount":0},"config":{"showtopic":1,"show_up_flag":true,"read_only":false},"replies":null,"upper":{"mid":295538256,"top":null,"vote":null},"top":null,"vote":0,"blacklist":0,"assist":0,"mode":3,"support_mode":[2,3],"control":{"input_disable":false,"root_input_text":"\xe5\x8f\x91\xe4\xb8\x80\xe6\x9d\xa1\xe5\x8f\x8b\xe5\x96\x84\xe7\x9a\x84\xe8\xaf\x84\xe8\xae\xba","child_input_text":"","giveup_input_text":"\xe4\xb8\x8d\xe5\x8f\x91\xe6\xb2\xa1\xe5\x85\xb3\xe7\xb3\xbb\xef\xbc\x8c\xe8\xaf\xb7\xe7\xbb\xa7\xe7\xbb\xad\xe5\x8f\x8b\xe5\x96\x84\xe5\x93\xa6~","screenshot_icon_state":3,"upload_picture_icon_state":3,"answer_guide_text":"\xe9\x9c\x80\xe8\xa6\x81\xe5\x8d\x87\xe7\xba\xa7\xe6\x88\x90\xe4\xb8\xbalv2\xe4\xbc\x9a\xe5\x91\x98\xe5\x90\x8e\xe6\x89\x8d\xe5\x8f\xaf\xe4\xbb\xa5\xe8\xaf\x84\xe8\xae\xba\xef\xbc\x8c\xe5\x85\x88\xe5\x8e\xbb\xe7\xad\x94\xe9\xa2\x98