### 热搜（request，爬不了详情页）

In [18]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
爬取微博热搜前50条：排名、标题、热度、阅读量、讨论量、详情页URL，
并保存为 CSV（weibo_hot.csv）
"""

import requests
from bs4 import BeautifulSoup
import time
import random
import urllib.parse
import csv
import sys

# 全局请求头，模拟浏览器
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 ",
    "cookie": "SCF=AvSKhFc03l4Rh_j2xSX7gPrzpY_yODV2-61lZhom9tPuwknwCFkXdReRlqoKjMmde6M4GnVvZyHIOVBHBC8-wko.; SINAGLOBAL=4918828378775.037.1734511921189; UOR=,,www.google.com; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWxuIxHTcvTlbN0RPpN9Wrv5JpX5KMhUgL.FoMEeh5pSoece052dJLoIp9jIg_Li--Ni-82iKn4i--4i-20iKy8; ALF=1749259237; SUB=_2A25FGHa1DeRhGeFM61IQ9i3KyDyIHXVmVPZ9rDV8PUJbkNAYLXXwkW1NQOJQREFBoWuCRYSs3KTFsZ2QH_IENf1m; _s_tentry=www.google.com; Apache=9360931081077.062.1746667312560; ULV=1746667312573:6:1:1:9360931081077.062.1746667312560:1742028222529",
}

def get_hot_search_list(limit=50):
    """ 获取微博热搜列表页前 N 条数据（遍历所有 tr，遇到符合格式的就解析） """
    url = "https://s.weibo.com/top/summary?cate=realtimehot"
    resp = requests.get(url, headers=HEADERS, timeout=10)
    resp.encoding = resp.apparent_encoding
    soup = BeautifulSoup(resp.text, "lxml")

    hot_list = []
    for tr in soup.find_all("tr"):
        rank_td = tr.find("td", class_="td-01")
        title_td = tr.find("td", class_="td-02")
        if not rank_td or not title_td:
            continue

        rank_text = rank_td.get_text(strip=True)
        if not rank_text.isdigit():
            continue

        a_tag = title_td.find("a")
        title = a_tag.get_text(strip=True) if a_tag else ""
        heat_span = title_td.find("span")
        heat = heat_span.get_text(strip=True) if heat_span else ""

        href = a_tag.get("href", "") if a_tag else ""
        full_link = urllib.parse.urljoin("https://s.weibo.com", href)
        params = urllib.parse.parse_qs(urllib.parse.urlparse(full_link).query)
        q = params.get("q", [None])[0]
        if q:
            q_enc = urllib.parse.quote(q, safe="")
            detail_url = f"https://m.s.weibo.com/vtopic/detail_new?click_from=searchpc&q={q_enc}"
        else:
            detail_url = None

        hot_list.append({
            "rank": int(rank_text),
            "title": title,
            "heat": heat,
            "detail_url": detail_url,
        })

        if len(hot_list) >= limit:
            break

    if not hot_list:
        print("⚠️ 没有抓到任何热搜，可能被反爬或页面结构再改动，请检查网络／Cookie／Headers。")
        sys.exit(1)

    return hot_list

def fetch_detail_stats(detail_url):
    """ 根据详情页 HTML，提取阅读量、讨论量、互动量、原创量 """
    if not detail_url:
        return None, None, None, None
    resp = requests.get(detail_url, headers=HEADERS, timeout=10)
    resp.encoding = resp.apparent_encoding
    soup = BeautifulSoup(resp.text, "lxml")

    detail_data = soup.find("div", class_="detail-data")
    if not detail_data:
        return None, None, None, None

    reading = discussion = interaction = original = None
    for item in detail_data.find_all("div", class_="item-col"):
        label = item.find("div", class_="des").get_text(strip=True)
        num_div = item.find("div", class_="num")
        num_text = "".join(num_div.stripped_strings)
        if label == "阅读量":
            reading = num_text
        elif label == "讨论量":
            discussion = num_text
        elif label == "互动量":
            interaction = num_text
        elif label == "原创量":
            original = num_text

    return reading, discussion, interaction, original

def save_to_csv(data_list, filename="weibo_hot.csv"):
    """ 将结果列表保存为 CSV 文件 """
    fieldnames = [
        "rank", "title", "heat",
        "reading", "discussion", "interaction", "original",
        "detail_url"
    ]
    with open(filename, mode="w", encoding="utf-8-sig", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for item in data_list:
            writer.writerow({
                "rank": item.get("rank", ""),
                "title": item.get("title", ""),
                "heat": item.get("heat", ""),
                "reading": item.get("reading", ""),
                "discussion": item.get("discussion", ""),
                "interaction": item.get("interaction", ""),
                "original": item.get("original", ""),
                "detail_url": item.get("detail_url", ""),
            })
    print(f"✅ 已保存到文件：{filename}")

def main():
    # 1. 抓列表
    hot_list = get_hot_search_list(50)

    # 2. 遍历抓详情
    for item in hot_list:
        time.sleep(random.uniform(0.5, 1.5))
        reading, discussion, interaction, original = fetch_detail_stats(item["detail_url"])
        item["reading"] = reading
        item["discussion"] = discussion
        item["interaction"] = interaction
        item["original"] = original
        print(
            f"排{item['rank']:>2} | {item['title']:<30} | 热度 {item['heat']:<7} | "
            f"阅读 {reading or '-':<8} | 讨论 {discussion or '-':<6} | "
            f"互动 {interaction or '-':<6} | 原创 {original or '-'}"
        )

    # 3. 保存 CSV
    save_to_csv(hot_list)

if __name__ == "__main__":
    main()


排 1 | 淘宝免单答案清一色                      | 热度 1296606 | 阅读 -        | 讨论 -      | 互动 -      | 原创 -
排 2 | 全红婵家新房盖好可拎包入住                  | 热度 688267  | 阅读 -        | 讨论 -      | 互动 -      | 原创 -
排 3 | 一图速览一揽子金融增量政策                  | 热度 579872  | 阅读 -        | 讨论 -      | 互动 -      | 原创 -


KeyboardInterrupt: 

### 热搜榜

In [17]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
爬取微博热搜前50条：排名、标题、热度，
用 Selenium（手机 UA+无界面）渲染详情页，提取阅读量、讨论量、互动量、原创量，
并保存为 CSV（weibo_hot.csv）
"""

import csv
import random
import sys
import time
import urllib.parse

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# —— 全局配置 —— #

# PC 端列表页用的请求头
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 ",
    "cookie": "SCF=AvSKhFc03l4Rh_j2xSX7gPrzpY_yODV2-61lZhom9tPuwknwCFkXdReRlqoKjMmde6M4GnVvZyHIOVBHBC8-wko.; SINAGLOBAL=4918828378775.037.1734511921189; UOR=,,www.google.com; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWxuIxHTcvTlbN0RPpN9Wrv5JpX5KMhUgL.FoMEeh5pSoece052dJLoIp9jIg_Li--Ni-82iKn4i--4i-20iKy8; ALF=1749259237; SUB=_2A25FGHa1DeRhGeFM61IQ9i3KyDyIHXVmVPZ9rDV8PUJbkNAYLXXwkW1NQOJQREFBoWuCRYSs3KTFsZ2QH_IENf1m; _s_tentry=www.google.com; Apache=9360931081077.062.1746667312560; ULV=1746667312573:6:1:1:9360931081077.062.1746667312560:1742028222529",
}

# 移动端 
MOBILE_UA = (
    "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
    "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 "
    "Mobile/15E148 Safari/604.1"
)

# Selenium 无界面 
chrome_opts = Options()
chrome_opts.add_argument("--headless")
chrome_opts.add_argument("--disable-gpu")
chrome_opts.add_argument("--no-sandbox")
chrome_opts.add_argument(f"--user-agent={MOBILE_UA}")

driver = webdriver.Chrome(options=chrome_opts)


def get_hot_search_list(limit=50):
    """ 用 requests 抓 PC 端热搜列表，返回前 N 条的 rank/title/heat/detail_url """
    url = "https://s.weibo.com/top/summary?cate=realtimehot"
    resp = requests.get(url, headers=HEADERS, timeout=10)
    resp.encoding = resp.apparent_encoding
    soup = BeautifulSoup(resp.text, "lxml")

    hot = []
    for tr in soup.find_all("tr"):
        td1 = tr.find("td", class_="td-01")
        td2 = tr.find("td", class_="td-02")
        if not td1 or not td2:
            continue
        rank = td1.get_text(strip=True)
        if not rank.isdigit():
            continue

        a = td2.find("a")
        title = a.get_text(strip=True) if a else ""
        heat = td2.find("span").get_text(strip=True) if td2.find("span") else ""

        href = a.get("href", "") if a else ""
        # PC 列表里 href 是 /weibo?q=...，直接跳到移动端渲染页面
        params = urllib.parse.parse_qs(urllib.parse.urlparse(href).query)
        q = params.get("q", [None])[0]
        if q:
            q_enc = urllib.parse.quote(q, safe="")
            detail_url = f"https://m.s.weibo.com/vtopic/detail_new?click_from=searchpc&q={q_enc}"
        else:
            detail_url = None

        hot.append({
            "rank": int(rank),
            "title": title,
            "heat": heat,
            "detail_url": detail_url
        })
        if len(hot) >= limit:
            break

    if not hot:
        print("⚠️ 热搜列表为空，请检查网络或被反爬拦截。")
        sys.exit(1)

    return hot


def fetch_detail_stats(detail_url):
    """
    用 Selenium（移动端 UA）渲染 detail_url，
    提取：阅读量、讨论量、互动量、原创量
    """
    driver.get(detail_url)
    time.sleep(2)  # 等待 JS 渲染

    soup = BeautifulSoup(driver.page_source, "lxml")
    box = soup.find("div", class_="detail-data")
    if not box:
        return None, None, None, None

    reading = discussion = interaction = original = None
    for item in box.find_all("div", class_="item-col"):
        label = item.find("div", class_="des").get_text(strip=True)
        num = "".join(item.find("div", class_="num").stripped_strings)
        if label == "阅读量":
            reading = num
        elif label == "讨论量":
            discussion = num
        elif label == "互动量":
            interaction = num
        elif label == "原创量":
            original = num

    return reading, discussion, interaction, original


def save_to_csv(data, filename="weibo_hot.csv"):
    """ 保存到 CSV """
    fields = [
        "rank", "title", "heat",
        "reading", "discussion", "interaction", "original",
        "detail_url"
    ]
    with open(filename, "w", encoding="utf-8-sig", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fields)
        writer.writeheader()
        for row in data:
            writer.writerow(row)
    print(f"✅ 数据已保存到 {filename}")


def main():
    hot_list = get_hot_search_list(50)
    for item in hot_list:
        time.sleep(random.uniform(0.5, 1.5))
        r, d, i, o = fetch_detail_stats(item["detail_url"])
        item["reading"], item["discussion"], item["interaction"], item["original"] = r, d, i, o
        print(f"排{item['rank']:>2} | {item['title']:<20} | 热度 {item['heat']:<7} | "
              f"阅{r or '-':<8} 讨{d or '-':<6} 互{i or '-':<6} 原{o or '-'}")

    save_to_csv(hot_list)
    driver.quit()


if __name__ == "__main__":
    main()


排 1 | 淘宝免单答案清一色            | 热度 1306413 | 阅671.5万   讨4922   互7114   原3358
排 2 | 全红婵家新房盖好可拎包入住        | 热度 681177  | 阅1087.2万  讨486    互3176   原114
排 3 | 一图速览一揽子金融增量政策        | 热度 662678  | 阅2582万    讨2121   互4148   原467
排 4 | 泡泡玛特股份被创始股东高位清仓      | 热度 653847  | 阅3373.7万  讨3208   互1.4万   原876
排 5 | 中央气象台连发3预警           | 热度 590091  | 阅1561.8万  讨3625   互7256   原779
排 6 | 团队曝异瞳少年治沙为摆拍         | 热度 577478  | 阅4035.3万  讨3481   互2.6万   原1117
排 7 | 特朗普将宣布波斯湾改称阿拉伯湾      | 热度 512196  | 阅2249.5万  讨2074   互1.3万   原733
排 8 | 小女孩拿自家金饰去卖老板直接扣下     | 热度 390157  | 阅1674.6万  讨2461   互1.1万   原458
排 9 | 学生班级搞孤立老师坚定停课惩治      | 热度 356428  | 阅6665.8万  讨1.3万   互7万     原2533
排10 | 被刘嘉玲认证超级帅哥的含金量       | 热度 354191  | 阅2365.1万  讨2.1万   互7.5万   原6121
排11 | 杜淳妻子发长文给未婚女生十点建议     | 热度 346319  | 阅2940.7万  讨2245   互1.1万   原547
排12 | 印巴125架战机激战1小时        | 热度 342338  | 阅866.6万   讨1025   互7894   原281
排13 | 悟空                   | 热度 336351  | 阅2.6亿     讨17.1万  互30.7万  原2.3万
排14 | 美联储主席发布会提及关税超20次     | 热度 334026  | 阅95.

In [29]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
爬取微博热搜前50条：排名、标题、热度，
用 Selenium（手机 UA+无界面）渲染详情页，提取阅读量、讨论量、互动量、原创量，
并保存为 CSV（weibo_hot.csv）
"""

import csv
import random
import sys
import time
import urllib.parse

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# —— 全局配置 —— #

# PC 端列表页用的请求头
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 ",
    "cookie": "SCF=AvSKhFc03l4Rh_j2xSX7gPrzpY_yODV2-61lZhom9tPuwknwCFkXdReRlqoKjMmde6M4GnVvZyHIOVBHBC8-wko.; SINAGLOBAL=4918828378775.037.1734511921189; UOR=,,www.google.com; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWxuIxHTcvTlbN0RPpN9Wrv5JpX5KMhUgL.FoMEeh5pSoece052dJLoIp9jIg_Li--Ni-82iKn4i--4i-20iKy8; ALF=1749259237; SUB=_2A25FGHa1DeRhGeFM61IQ9i3KyDyIHXVmVPZ9rDV8PUJbkNAYLXXwkW1NQOJQREFBoWuCRYSs3KTFsZ2QH_IENf1m; _s_tentry=www.google.com; Apache=9360931081077.062.1746667312560; ULV=1746667312573:6:1:1:9360931081077.062.1746667312560:1742028222529",
}

# 移动端 
MOBILE_UA = (
    "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
    "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 "
    "Mobile/15E148 Safari/604.1"
)

# Selenium 无界面 
chrome_opts = Options()
chrome_opts.add_argument("--headless")
chrome_opts.add_argument("--disable-gpu")
chrome_opts.add_argument("--no-sandbox")
chrome_opts.add_argument(f"--user-agent={MOBILE_UA}")

driver = webdriver.Chrome(options=chrome_opts)


def get_hot_search_list(limit=50):
    """ 用 requests 抓 PC 端热搜列表，返回前 N 条的 rank/title/heat/detail_url """
    url = "https://s.weibo.com/top/summary?cate=entrank"
    resp = requests.get(url, headers=HEADERS, timeout=10)
    resp.encoding = resp.apparent_encoding
    soup = BeautifulSoup(resp.text, "lxml")

    hot = []
    for tr in soup.find_all("tr"):
        td1 = tr.find("td", class_="td-01")
        td2 = tr.find("td", class_="td-02")
        if not td1 or not td2:
            continue
        rank = td1.get_text(strip=True)
        if not rank.isdigit():
            continue

        a = td2.find("a")
        title = a.get_text(strip=True) if a else ""
        heat = td2.find("span").get_text(strip=True) if td2.find("span") else ""

        href = a.get("href", "") if a else ""
        # PC 列表里 href 是 /weibo?q=...，直接跳到移动端渲染页面
        params = urllib.parse.parse_qs(urllib.parse.urlparse(href).query)
        q = params.get("q", [None])[0]
        if q:
            q_enc = urllib.parse.quote(q, safe="")
            detail_url = f"https://m.s.weibo.com/vtopic/detail_new?click_from=searchpc&q={q_enc}"
        else:
            detail_url = None

        hot.append({
            "rank": int(rank),
            "title": title,
            "heat": heat,
            "detail_url": detail_url
        })
        if len(hot) >= limit:
            break

    if not hot:
        print("⚠️ 热搜列表为空，请检查网络或被反爬拦截。")
        sys.exit(1)

    return hot


def fetch_detail_stats(detail_url):
    """
    用 Selenium（移动端 UA）渲染 detail_url，
    提取：阅读量、讨论量、互动量、原创量
    """
    driver.get(detail_url)
    time.sleep(2)  # 等待 JS 渲染

    soup = BeautifulSoup(driver.page_source, "lxml")
    box = soup.find("div", class_="detail-data")
    if not box:
        return None, None, None, None

    reading = discussion = interaction = original = None
    for item in box.find_all("div", class_="item-col"):
        label = item.find("div", class_="des").get_text(strip=True)
        num = "".join(item.find("div", class_="num").stripped_strings)
        if label == "阅读量":
            reading = num
        elif label == "讨论量":
            discussion = num
        elif label == "互动量":
            interaction = num
        elif label == "原创量":
            original = num

    return reading, discussion, interaction, original


def save_to_csv(data, filename="weibo_entrank.csv"):
    """ 保存到 CSV """
    fields = [
        "rank", "title", "heat",
        "reading", "discussion", "interaction", "original",
        "detail_url"
    ]
    with open(filename, "w", encoding="utf-8-sig", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fields)
        writer.writeheader()
        for row in data:
            writer.writerow(row)
    print(f"✅ 数据已保存到 {filename}")


def main():
    hot_list = get_hot_search_list(50)
    for item in hot_list:
        time.sleep(random.uniform(0.5, 1.5))
        r, d, i, o = fetch_detail_stats(item["detail_url"])
        item["reading"], item["discussion"], item["interaction"], item["original"] = r, d, i, o
        print(f"排{item['rank']:>2} | {item['title']:<20} | 热度 {item['heat']:<7} | "
              f"阅{r or '-':<8} 讨{d or '-':<6} 互{i or '-':<6} 原{o or '-'}")

    save_to_csv(hot_list)
    driver.quit()


if __name__ == "__main__":
    main()


排 1 | 杜淳妻子发长文给未婚女生十点建议     | 热度 1185513 | 阅4122.8万  讨3074   互1.6万   原822
排 2 | 悟空                   | 热度 676709  | 阅2.6亿     讨17.3万  互32万    原2.5万


KeyboardInterrupt: 

In [69]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
爬取微博热搜前50条：标题、详情页 URL（移动端），
详情页使用 Selenium 渲染并提取阅读量、讨论量、互动量、原创量，
并保存为 CSV（weibo_socialevent.csv）
"""

import csv
import time
import random
import urllib.parse
import sys
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import requests

# —— 全局配置 —— #

# 请求头，仅用于列表页
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
    "Cookie": (
        "SCF=AvSKhFc03l4Rh_j2xSX7gPrzpY_yODV2-61lZhom9tPuwknwCFkXdReRlqoKjMmde6M4GnVvZyHIOVBHBC8-wko.; SINAGLOBAL=4918828378775.037.1734511921189; UOR=,,www.google.com; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWxuIxHTcvTlbN0RPpN9Wrv5JpX5KMhUgL.FoMEeh5pSoece052dJLoIp9jIg_Li--Ni-82iKn4i--4i-20iKy8; ALF=1749259237; SUB=_2A25FGHa1DeRhGeFM61IQ9i3KyDyIHXVmVPZ9rDV8PUJbkNAYLXXwkW1NQOJQREFBoWuCRYSs3KTFsZ2QH_IENf1m; _s_tentry=www.google.com; Apache=9360931081077.062.1746667312560; ULV=1746667312573:6:1:1:9360931081077.062.1746667312560:1742028222529"
    ),
}

# Selenium 配置：无界面 + 移动端 UA
MOBILE_UA = (
    "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
    "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 "
    "Mobile/15E148 Safari/604.1"
)
chrome_opts = Options()
chrome_opts.add_argument("--headless")
chrome_opts.add_argument("--disable-gpu")
chrome_opts.add_argument("--no-sandbox")
chrome_opts.add_argument(f"--user-agent={MOBILE_UA}")
# 启动 WebDriver
driver = webdriver.Chrome(options=chrome_opts)

def get_hot_search_list(limit=50):
    """获取热搜列表，返回标题与移动端详情 URL"""
    url = "https://s.weibo.com/top/summary?cate=socialevent"
    resp = requests.get(url, headers=HEADERS, timeout=10)
    resp.encoding = resp.apparent_encoding
    soup = BeautifulSoup(resp.text, "lxml")

    hot_list = []
    for tr in soup.find_all("tr"):
        a_tag = tr.select_one("td.td-02 > a")
        if not a_tag or not a_tag.has_attr("href"):
            continue
        title = a_tag.get_text(strip=True)
        href = a_tag["href"]
        # 构造移动端详情页
        params = urllib.parse.parse_qs(urllib.parse.urlparse(href).query)
        q = params.get("q", [None])[0]
        if not q:
            continue
        q_enc = urllib.parse.quote(q, safe="")
        detail_url = f"https://m.s.weibo.com/vtopic/detail_new?click_from=searchpc&q={q_enc}"
        hot_list.append({"title": title, "detail_url": detail_url})
        if len(hot_list) >= limit:
            break

    if not hot_list:
        print("⚠️ 未获取到热搜列表，请检查网络或 headers。")
        sys.exit(1)
    return hot_list


def fetch_detail_stats(detail_url):
    """使用 Selenium 渲染详情页，并提取阅读量、讨论量、互动量、原创量"""
    driver.get(detail_url)
    # 等待页面 JS 渲染
    time.sleep(2)
    soup = BeautifulSoup(driver.page_source, "lxml")
    # 遍历 .item-col
    reading = discussion = interaction = original = None
    for item in soup.find_all("div", class_="item-col"):
        label_tag = item.find("div", class_="des")
        num_tag = item.find("div", class_="num")
        if not label_tag or not num_tag:
            continue
        label = label_tag.get_text(strip=True)
        num = "".join(num_tag.stripped_strings)
        if label == "阅读量":
            reading = num
        elif label == "讨论量":
            discussion = num
        elif label == "互动量":
            interaction = num
        elif label == "原创量":
            original = num
    return reading, discussion, interaction, original


def save_to_csv(data_list, filename="weibo_socialevent.csv"):
    """保存数据到 CSV 文件"""
    fieldnames = [
        "title", "detail_url",
        "reading", "discussion", "interaction", "original"
    ]
    with open(filename, mode="w", encoding="utf-8-sig", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for item in data_list:
            writer.writerow({
                "title": item.get("title", ""),
                "detail_url": item.get("detail_url", ""),
                "reading": item.get("reading", ""),
                "discussion": item.get("discussion", ""),
                "interaction": item.get("interaction", ""),
                "original": item.get("original", ""),
            })
    print(f"✅ 已保存到文件：{filename}")


def main():
    # 1. 获取热搜列表
    hot_list = get_hot_search_list(50)
    # 2. 遍历详情页
    for item in hot_list:
        time.sleep(random.uniform(0.5, 1.5))
        r, d, i, o = fetch_detail_stats(item["detail_url"])
        item.update({"reading": r, "discussion": d, "interaction": i, "original": o})
        print(f"{item['title']:<30} | 阅读:{r or '-'} 讨论:{d or '-'} 互动:{i or '-'} 原创:{o or '-'}")
    # 3. 保存 CSV
    save_to_csv(hot_list)
    # 4. 退出浏览器
    driver.quit()


if __name__ == "__main__":
    main()

#一图速览一揽子金融增量政策#                | 阅读:2915.2万 讨论:2412 互动:4816 原创:529
#泡泡玛特股份被创始股东高位清仓#              | 阅读:4186.7万 讨论:4175 互动:1.8万 原创:1045
#中央气象台连发3预警#                   | 阅读:2263.2万 讨论:5737 互动:1.2万 原创:1234
#乘务员错把酒当水倒给3岁小孩#               | 阅读:698.3万 讨论:305 互动:1674 原创:86
#今天世界微笑日#                      | 阅读:1.2亿 讨论:7.9万 互动:18.2万 原创:5456
#阳光灿烂的笑容有多治愈#                  | 阅读:541.4万 讨论:6692 互动:1.7万 原创:573
#小女孩拿自家金饰去卖老板直接扣下#             | 阅读:2223.4万 讨论:3141 互动:1.5万 原创:608
#学生班级搞孤立老师坚定停课惩治#              | 阅读:7197.5万 讨论:1.5万 互动:7.6万 原创:2662
#江苏将有8级以上阵风#                   | 阅读:503.2万 讨论:468 互动:960 原创:161
#特朗普号召美国民众勒紧裤腰带#               | 阅读:1526.9万 讨论:2956 互动:1.8万 原创:713
#施工方回应全红婵老家盖别墅#                | 阅读:278.8万 讨论:81 互动:697 原创:25
#美联储主席发布会提及关税超20次#             | 阅读:427.3万 讨论:351 互动:1235 原创:89
#北京公积金贷款利率下调#                  | 阅读:481.1万 讨论:279 互动:483 原创:132
#奈雪内部人士回应改名#                   | 阅读:357.3万 讨论:118 互动:390 原创:30
#中国留学生在马来西亚遭绑架后获救#             | 阅读:584.8万 讨论:310 互动:2069 原创:72
#美联储宣布维持基准利率不变#   

In [43]:
url = "https://s.weibo.com/top/summary?cate=socialevent"
resp = requests.get(url, headers=HEADERS, timeout=10)
resp.encoding = resp.apparent_encoding
soup = BeautifulSoup(resp.text, "lxml")

hot_list = []
for tr in soup.find_all("tr"):

    a_tag = tr.select_one("td.td-02 > a")
    title = a_tag.get_text(strip=True) if a_tag else ""

    href = a_tag["href"] if a_tag else ""

In [48]:
href

'/weibo?t=60&q=%23%E4%B8%AD%E5%BE%B7%E4%B8%A4%E5%9B%BD%E7%90%86%E5%BD%93%E6%90%BA%E6%89%8B%E6%8A%B5%E5%BE%A1%E7%96%BE%E9%A3%8E%E9%AA%A4%E9%9B%A8%23'

In [51]:
full_link = urllib.parse.urljoin("https://s.weibo.com", href)
params = urllib.parse.parse_qs(urllib.parse.urlparse(full_link).query)
q = params.get("q", [None])[0]
if q:
    q_enc = urllib.parse.quote(q, safe="")
    detail_url = f"https://m.s.weibo.com/vtopic/detail_new?click_from=searchpc&q={q_enc}"
else:
    detail_url = None

In [57]:
detail_url

'https://m.s.weibo.com/vtopic/detail_new?click_from=searchpc&q=%23%E4%B8%AD%E5%BE%B7%E4%B8%A4%E5%9B%BD%E7%90%86%E5%BD%93%E6%90%BA%E6%89%8B%E6%8A%B5%E5%BE%A1%E7%96%BE%E9%A3%8E%E9%AA%A4%E9%9B%A8%23'

In [68]:
resp = requests.get(detail_url, headers=HEADERS, timeout=10).json()
resp.encoding = resp.apparent_encoding
soup = BeautifulSoup(resp.text, "lxml")

reading = discussion = interaction = original = None
# 3. 遍历所有 .item-col，按 .des 文本分类
for item in soup.find_all("div", class_="item-col"):
    label = item.find("div", class_="des").get_text(strip=True)
    # num 中可能包含 <span> 万，需要拼接所有文本
    num = "".join(item.find("div", class_="num").stripped_strings)
    if label == "阅读量":
        reading = num
    elif label == "讨论量":
        discussion = num
    elif label == "互动量":
        interaction = num
    elif label == "原创量":
        original = num

# 4. 打印结果
print("阅读量：", reading)        # 460.7万
print("讨论量：", discussion)      # 512
print("互动量：", interaction)      # 2094
print("原创量：", original)        # 43

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [67]:
import requests, time
url = "https://m.s.weibo.com/vtopic/detail_new?click_from=searchpc&q=%23%E4%B8%AD%E5%BE%B7%E4%B8%A4%E5%9B%BD%E7%90%86%E5%BD%93%E6%90%BA%E6%89%8B%E6%8A%B5%E5%BE%A1%E7%96%BE%E9%A3%8E%E9%AA%A4%E9%9B%A8%23"
params = {"cate": "socialevent", "_": int(time.time()*1000)}
headers = {"User-Agent": "...", "Cookie": "..."}  # 如有需要
data = requests.get(url, params=params, headers=headers).json()
# data["data"]["socialevent"] 就是一列表
data

JSONDecodeError: Expecting value: line 1 column 1 (char 0)