In [None]:
import requests
import re

# Get the HTML content of the Bilibili video page
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
res = requests.get("https://www.bilibili.com/bangumi/play/ep63860",headers=headers)

# Extract the first "cid" from the HTML content using regular expressions
cid_matches = re.findall(r'"cid":(.*?),', res.text)
if cid_matches:
    # Use the first cid found
    first_cid = cid_matches[0]
    print("First cid:", first_cid)

    # Construct the comment XML URL
    comment_url = f'https://comment.bilibili.com/{first_cid}.xml'

    # Get the comment XML content
    res_comment = requests.get(comment_url)

    # Save the comment XML content to a file
    with open(f'{first_cid}.xml', 'wb') as f:
        f.write(res_comment.content)
else:
    print("No cid found.")


In [None]:
import re

with open('228242151.xml', encoding='utf-8') as f:
    data = f.read()

comments = re.findall('<d p="(.*?)">(.*?)</d>', data)
# print(len(comments))  # 3000
danmus = [','.join(item) for item in comments]
headers = ['stime', 'mode', 'size', 'color', 'date', 'pool', 'author', 'dbid', 'text']
headers = ','.join(headers)
danmus.insert(0, headers)

with open('danmus.csv', 'w', encoding='utf_8_sig') as f:
    f.writelines([line+'\n' for line in danmus])

In [None]:
from pyecharts import options as opts
from pyecharts.charts import WordCloud
import jieba

with open('danmus.csv', encoding='utf-8') as f:
    text = " ".join([line.split(',')[-1] for line in f.readlines()])

words = jieba.cut(text)
_dict = {}
for word in words:
    if len(word) >= 2:
        _dict[word] = _dict.get(word, 0)+1
items = list(_dict.items())
items.sort(key=lambda x: x[1], reverse=True)

c = (
    WordCloud()
    .add(
        "",
        items,
        word_size_range=[20, 120],
        textstyle_opts=opts.TextStyleOpts(font_family="cursive"),
    )
    .render("wordcloud.html")
)

In [None]:
from snownlp import SnowNLP
from pyecharts import options as opts
from pyecharts.charts import Pie

with open('danmus.csv', encoding='utf-8') as f:
    text = [line.split(',')[-1] for line in f.readlines()[1:]]

emotions = {
    'positive': 0,
    'negative': 0,
    'neutral': 0
}
for item in text:
    if SnowNLP(item).sentiments > 0.6:
        emotions['positive'] += 1
    elif SnowNLP(item).sentiments < 0.4:
        emotions['negative'] += 1
    else:
        emotions['neutral'] += 1
print(emotions)


c = (
    Pie()
    .add("", list(emotions.items()))
    .set_colors(["blue", "purple", "orange"])
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c} ({d}%)"))
    .render("emotion.html")
)

In [None]:
from pyecharts.commons.utils import JsCode
from pyecharts.charts import Line
from pyecharts.charts import Line, Grid
import pyecharts.options as opts


with open('danmus.csv', encoding='utf-8') as f:
    text = [float(line.split(',')[0]) for line in f.readlines()[1:]]


text = sorted([int(item) for item in text])
data = {}
for item in text:
    item = int(item/60)
    data[item] = data.get(item, 0)+1


x_data = list(data.keys())
y_data = list(data.values())
background_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#333'}, {offset: 1, color: '#333'}], false)"
)
area_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#eb64fb'}, {offset: 1, color: '#3fbbff0d'}], false)"
)
c = (
    Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="弹幕数量",
        y_axis=y_data,
        is_smooth=True,
        symbol="circle",
        symbol_size=6,
        linestyle_opts=opts.LineStyleOpts(color="#fff"),
        label_opts=opts.LabelOpts(is_show=True, position="top", color="white"),
        itemstyle_opts=opts.ItemStyleOpts(
            color="red", border_color="#fff", border_width=3
        ),
        tooltip_opts=opts.TooltipOpts(is_show=True),
        areastyle_opts=opts.AreaStyleOpts(
            color=JsCode(area_color_js), opacity=1),
        markpoint_opts=opts.MarkPointOpts(
            data=[opts.MarkPointItem(type_="max")])
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="",
            pos_bottom="5%",
            pos_left="center",
            title_textstyle_opts=opts.TextStyleOpts(
                color="#fff", font_size=16),
        ),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            boundary_gap=False,
            axislabel_opts=opts.LabelOpts(margin=30, color="#ffffff63"),
            axisline_opts=opts.AxisLineOpts(
                linestyle_opts=opts.LineStyleOpts(width=2, color="#fff")
            ),
            axistick_opts=opts.AxisTickOpts(
                is_show=True,
                length=25,
                linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
            ),
            splitline_opts=opts.SplitLineOpts(
                is_show=True, linestyle_opts=opts.LineStyleOpts(color="#ffffff1f")
            )
        ),
        yaxis_opts=opts.AxisOpts(
            type_="value",
            position="left",
            axislabel_opts=opts.LabelOpts(margin=20, color="#ffffff63"),
            axisline_opts=opts.AxisLineOpts(
                linestyle_opts=opts.LineStyleOpts(width=2, color="#fff")
            ),
            axistick_opts=opts.AxisTickOpts(
                is_show=True,
                length=15,
                linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
            ),
            splitline_opts=opts.SplitLineOpts(
                is_show=True, linestyle_opts=opts.LineStyleOpts(color="#ffffff1f")
            ),
        ),
        legend_opts=opts.LegendOpts(is_show=False),
        tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="line")
    )
    .render("highlights.html")
)

In [None]:
import re

# 打开文件用于写入结果
with open('moment.txt', 'w', encoding='utf-8') as output_file:

    with open('danmus.csv', encoding='utf-8') as f:
        danmus = []
        for line in f.readlines()[1:]:
            time = int(float(line.split(',')[0]))
            text = line.split(',')[-1].replace('\n', '')
            danmus.append([time, text])

    danmus.sort(key=lambda x: x[0])
    high_energy_count = 0
    high_energy_times = []

    for item in danmus:
        if re.search('高能', item[1]):
            output_file.write(f'{int(item[0]/60)}m{item[0]%60}s {item[1]}\n')
            high_energy_count += 1
            high_energy_times.append(item[0])
        else:
            pass

    output_file.write(f"高能出现总次数: {high_energy_count}\n")

    # 统计高频出现的时间点
    if high_energy_count > 0:
        high_energy_times.sort()
        high_energy_freq = {}
        for time_point in high_energy_times:
            minute = int(time_point / 60)
            second = time_point % 60
            time_str = f"{minute}m{second}s"
            high_energy_freq[time_str] = high_energy_freq.get(time_str, 0) + 1

        # 找到高频出现的前三个时间点和次数
        top_three = sorted(high_energy_freq.items(), key=lambda x: x[1], reverse=True)[:3]

        # 输出高频出现的前三个时间点和次数到文件
        output_file.write("高频出现的前三个时间点和次数:\n")
        for time_str, freq in top_three:
            output_file.write(f"{time_str}: {freq}次\n")
    else:
        output_file.write("没有找到包含 '高能' 的弹幕。\n")
