In [1]:
# 根据用户ID获得指定时间段的视频信息
import requests
import hashlib
import time
import datetime
import json
import os
import re
import jsonpath
import pandas as pd
from functools import reduce
from urllib.parse import urlencode
from openpyxl import Workbook, load_workbook
import urllib.parse
import traceback
import random
import threading

# --- 配置区域 ---
# 请务必修改这里的配置
CONFIG = {
    # 确保这个Excel文件路径是正确的
    # 这个Excel文件应该包含要获取UP主MID号，MID号默认在第一列（索引为0）
    "excel_path": r"F:\Code\爬虫\UP主ID.xlsx",  # <--- 请替换为你的Excel文件路径

    # B站登录Cookie字符串池：重要！用于保持会话稳定性和反爬。请提供至少一个有效Cookie。
    # 获取方法：登录B站 -> F12开发者工具 -> Network (网络) -> 刷新页面 -> 找到任意请求 -> Headers (请求头) -> Request Headers (请求头) 中找到 'Cookie' 字段，复制其完整内容。
    "cookie_strings_pool": [
        "buvid3=384BBEAA-858B-A2E9-5B83-849BC33FA9C630622infoc; b_nut=1741237430; _uuid=4BBF9EA4-1FE10-3ECA-B936-D37E18A4104B730789infoc; enable_web_push=DISABLE; buvid4=3EC2F346-3DDE-A9E7-79EC-DD96376E9C5931240-025030605-UEW7z%2Frhc9FUd5uaNwO%2FDQ%3D%3D; buvid_fp=0f85c81c4fa8403529178b71f34e4055; rpdid=0zbfVJ1vzQ|7Qh8tt0I|2lF|3w1U6854; enable_feed_channel=ENABLE; theme-tip-show=SHOWED; theme-avatar-tip-show=SHOWED; theme-switch-show=SHOWED; bp_t_offset_355987571=1087855640585437184; header_theme_version=OPEN; bp_t_offset_435641086=1092808085770076160; b_lsid=C5FCC3FF_1987818959F; bsource=search_bing; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NTQ2MjA3MzIsImlhdCI6MTc1NDM2MTQ3MiwicGx0IjotMX0.HthfuyxINbFh1eFX5wF4oQ4exeKxXVeadrliEFJQJs4; bili_ticket_expires=1754620672; bp_t_offset_3493111795812748=1097463538261164032; CURRENT_FNVAL=2000; csrf_state=a2a7469d11ecb38b0cf9dc6600a52207; SESSDATA=720472af%2C1769913559%2Ccf013%2A82CjDtXnMmG5j4aZlVtvGckgP6ZiLzw9NpGJNtXFdWCasXC0Gi-LK17RfYB5OJH4EGhzESVmlPaEVUQW1rc0pLc0VLdDVtc2p2Tlp6ZVR5c25EQXZnc1NjRVFOcU0yOGtTQV9BVldzdUFvbGtDODZEWjZ6VC1OUW5uMl9ZQ1Z3a2Y5UU93VWE1UHFBIIEC; bili_jct=71f8500b3f14f993f7c50a54de0638cd; DedeUserID=3546919328549020; DedeUserID__ckMd5=34f98e6ac83665d9; sid=5awtpcjx; bp_t_offset_3546919328549020=1097463632750444544; home_feed_column=4; browser_resolution=763-834"# 可以添加更多Cookie字符串
    ],
    "cookie_rotate_interval_seconds": 900,  # Cookie轮换间隔，单位秒 (900秒 = 15分钟)

    # 视频时间筛选范围 2023年7月1日至2024年1月31日
    "start_date": "2023-07-01",  # 开始日期，格式：YYYY-MM-DD
    "end_date": "2024-01-31",  # 结束日期，格式：YYYY-MM-DD

    # 批量处理UP主的范围（从Excel中读取UP主列表的索引）
    "start_index": 750,  # 开始位置（从0开始）
    "end_index": None,  # 结束位置（None表示到文件末尾）

    # 每个UP主最大爬取页数：B站视频列表每页40个，可以根据UP主视频数量和需要设定。
    "max_pages_per_up": 20,
}

def sanitize_filename(name):
    """移除文件名中的非法字符"""
    return re.sub(r'[\\/*?:"<>|]', "", name)

def read_up_list_from_excel(file_path, start_index=0, end_index=None):
    try:
        if not os.path.exists(file_path):
            print(f"❌ 错误: '{file_path}' 文件不存在!")
            return []
        print(f"📊 正在读取Excel文件: {file_path}")
        df = pd.read_excel(file_path)

        up_mids = df.iloc[:, 0].tolist()  # 获取第一列数据

        up_mids = [str(mid).strip() for mid in up_mids if pd.notna(mid) and str(mid).strip()]

        # 移除可能的表头（如果第一行是字符串而不是MID）
        if up_mids and not up_mids[0].isdigit():  
            up_mids = up_mids[1:]

        total_count = len(up_mids)
        print(f"✅ 成功读取到 {total_count} 个UP主ID。")

        # 应用索引范围
        if end_index is None:
            end_index = total_count
        else:
            end_index = min(end_index, total_count)
        
        start_index = max(0, start_index)  # 确保start_index不为负

        if start_index >= total_count and total_count > 0:  # 如果文件非空但起始索引超出
            print(f"⚠️ 指定的开始索引 {start_index} 超出Excel中UP主ID的总数 {total_count}。")
            return []
        elif total_count == 0:  # 文件为空或无有效数据
            print("⚠️ Excel文件中没有找到有效的UP主ID。")
            return []


        selected_up_mids = up_mids[start_index:end_index]

        print(f"🎯 选择处理范围: [{start_index}:{end_index}]，共 {len(selected_up_mids)} 个UP主。")
        return selected_up_mids
    except Exception as e:
        print(f"❌ 读取Excel文件失败: {str(e)}")
        return []

def write_results_to_excel(df: pd.DataFrame, file_path: str):
    try:
        df.to_excel(file_path, index=False)
        print(f"💾 数据已保存到 '{file_path}'")
    except Exception as e:
        print(f"❌ 写入Excel出错: {str(e)}")

class BilibiliCrawler:
    DEFAULT_REQUEST_TIMEOUT = 60  # 请求超时时间
    DEFAULT_RETRY_COUNT = 5  # 重试次数
    DEFAULT_BACKOFF_FACTOR = 9  # 重试间隔因子 (1s, 3s, 9s, ...)

    def __init__(self, config):
        self.config = config

        self.cookie_strings_pool = config["cookie_strings_pool"]
        self.cookie_rotate_interval_seconds = config["cookie_rotate_interval_seconds"]
        self.current_cookie_index = 0
        self.last_cookie_change_time = time.time()
        self.cookies = {}  # requests library cookie jar
        
        # User-Agent池
        self.user_agent_pool = [
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:128.0) Gecko/20100101 Firefox/128.0',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/126.0.2592.56',
            'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.140 Mobile Safari/537.36',
            'Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Mobile/15E148 Safari/604.1',
        ]

        self.headers = {
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Referer': 'https://space.bilibili.com/',
            'Origin': 'https://space.bilibili.com',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site',
            'Cookie': ''  # 初始为空，由_set_current_cookie设置
        }
        
        self.wbi_keys = None  # 用于视频列表API的WBI密钥
        self.mixinKeyEncTab = [  # WBI签名用到的固定数组
            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
            33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
            61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
            36, 20, 34, 44, 52
        ]

        # 初始化时设置第一个 Cookie
        self._set_current_cookie()  

        # 统计数据
        self.batch_stats = {
            'total_processed': 0,
            'success_count': 0,
            'failed_count': 0,
            'total_videos': 0,
        }

        # 时间筛选范围
        self.start_timestamp = int(datetime.datetime.strptime(config["start_date"], "%Y-%m-%d").timestamp())
        self.end_timestamp = int(datetime.datetime.strptime(config["end_date"] + " 23:59:59", "%Y-%m-%d %H:%M:%S").timestamp())
        self.start_date = config["start_date"]
        self.end_date = config["end_date"]

    def _parse_cookies(self, cookie_string):
        """解析cookie字符串为字典"""
        cookies = {}
        for item in cookie_string.split(';'):
            if '=' in item:
                key, value = item.strip().split('=', 1)
                cookies[key] = value
        return cookies

    def _set_current_cookie(self):
        """设置当前使用的 Cookie"""
        if not self.cookie_strings_pool:
            print("❌ Cookie池为空，将无法进行Cookie轮换。")
            self.cookies = {}  # requests library cookie jar
            self.headers['Cookie'] = ""  # http header cookie string
            return

        cookie_string = self.cookie_strings_pool[self.current_cookie_index]
        self.cookies = self._parse_cookies(cookie_string)
        self.headers['Cookie'] = cookie_string  
        self.last_cookie_change_time = time.time()
        print(f"✅ Cookie已切换至池中索引 {self.current_cookie_index} 的Cookie。")

    def _rotate_cookie_if_needed(self):
        """根据时间间隔轮换Cookie"""
        if not self.cookie_strings_pool or len(self.cookie_strings_pool) <= 1:
            return

        if (time.time() - self.last_cookie_change_time) >= self.cookie_rotate_interval_seconds:
            self.current_cookie_index = (self.current_cookie_index + 1) % len(self.cookie_strings_pool)
            self._set_current_cookie()
            print(f"🔄 Cookie 达到 {self.cookie_rotate_interval_seconds} 秒轮换周期，已切换到新 Cookie (索引: {self.current_cookie_index})。")

    def _make_request(self, url, method="GET", params=None, data=None, json_data=None, extra_headers=None):
        self._rotate_cookie_if_needed()  # 每次请求前检查并轮换Cookie

        current_attempt = 0
        request_headers = self.headers.copy()  # headers已包含User-Agent和当前Cookie

        request_headers['User-Agent'] = random.choice(self.user_agent_pool)  # 随机选择User-Agent

        if extra_headers:
            request_headers.update(extra_headers)

        while current_attempt < self.DEFAULT_RETRY_COUNT:  # 使用类属性
            try:
                if method.upper() == "GET":
                    response = requests.get(
                        url,
                        headers=request_headers,
                        cookies=self.cookies,  # 使用self.cookies对象，它会在_set_current_cookie中更新
                        params=params,
                        timeout=self.DEFAULT_REQUEST_TIMEOUT  # 使用类属性
                    )
                elif method.upper() == "POST":
                    response = requests.post(
                        url,
                        headers=request_headers,
                        cookies=self.cookies,  # 使用self.cookies对象
                        params=params,
                        data=data,
                        json=json_data,
                        timeout=self.DEFAULT_REQUEST_TIMEOUT  # 使用类属性
                    )
                else:
                    raise ValueError(f"不支持的HTTP方法: {method}")

                if response.status_code == 200:
                    return response
                elif response.status_code in [403, 412, 429, 500, 502, 503, 504]:
                    print(f"HTTP错误: {response.status_code}. 尝试重试...")
                    time.sleep(self.DEFAULT_BACKOFF_FACTOR ** current_attempt)  # 使用类属性
                else:
                    print(f"非200 HTTP状态码: {response.status_code}.")
                    return response

            except requests.exceptions.Timeout:
                print("请求超时。尝试重试...")
                time.sleep(self.DEFAULT_BACKOFF_FACTOR ** current_attempt)
            except requests.exceptions.RequestException as e:
                print(f"请求异常: {e}. 尝试重试...")
                time.sleep(self.DEFAULT_BACKOFF_FACTOR ** current_attempt)
            except Exception as e:
                print(f"未知错误: {e}.")
                return None

            current_attempt += 1

        print(f"❌ 达到最大重试次数，请求失败: {url}")
        return None

    def test_cookie_validity(self):
        """测试Cookie池中当前Cookie的有效性"""
        test_url = "https://api.bilibili.com/x/web-interface/nav"
        
        # 使用当前加载到self.cookies和self.headers['Cookie']的Cookie进行测试
        response = self._make_request(test_url)

        if response and response.status_code == 200:
            try:
                data = response.json()
                if data.get('code') == 0:
                    user_info = data.get('data', {})
                    return True, {
                        'username': user_info.get('uname', '未知'),
                        'uid': user_info.get('mid', '未知'),
                        'level': user_info.get('level_info', {}).get('current_level', 0),
                        'coins': user_info.get('money', 0),
                        'vip_status': user_info.get('vipStatus', 0)
                    }
                else:
                    return False, f"API返回错误: {data.get('message', '未知错误')} (Code: {data.get('code')})"
            except json.JSONDecodeError:
                return False, "无法解析Cookie测试API的响应为JSON。"
        else:
            status_code = response.status_code if response else 'N/A'
            return False, f"HTTP请求失败或无响应，状态码: {status_code}"

    # --- B站WBI签名算法 (用于视频列表和详情) ---
    def get_mixin_key(self, orig: str) -> str:
        return ''.join([orig[i] for i in self.mixinKeyEncTab])[:32]

    def enc_wbi(self, params: dict, img_key: str, sub_key: str) -> dict:
        mixin_key = self.get_mixin_key(img_key + sub_key)
        curr_time = round(time.time())
        params['wts'] = curr_time

        sorted_params = sorted(params.items())
        query = urlencode(sorted_params)

        wbi_sign = hashlib.md5((query + mixin_key).encode()).hexdigest()
        params['w_rid'] = wbi_sign
        return params

    def get_wbi_keys(self):
        # 优化：每次只在需要时才获取 WBI 密钥，并缓存
        if hasattr(self, '_cached_wbi_keys') and self._cached_wbi_keys:
            return self._cached_wbi_keys

        response = self._make_request(
            'https://api.bilibili.com/x/web-interface/nav'
        )

        if response and response.status_code == 200:
            try:
                json_content = response.json()
                if json_content.get('code') == 0:
                    wbi_img = json_content['data']['wbi_img']
                    img_url: str = wbi_img['img_url']
                    sub_url: str = wbi_img['sub_url']
                    img_key = img_url.rsplit('/', 1)[1].split('.')[0]
                    sub_key = sub_url.rsplit('/', 1)[1].split('.')[0]
                    self._cached_wbi_keys = (img_key, sub_key)  # 缓存密钥
                    return img_key, sub_key
                else:
                    print(f"获取WBI密钥失败（API错误）: {json_content.get('message')} (Code: {json_content.get('code')})")
            except json.JSONDecodeError:
                print("获取WBI密钥响应JSON解析失败。")
        else:
            status_code = response.status_code if response else 'N/A'
            print(f"获取WBI密钥HTTP请求失败，状态码: {status_code}")

        return None, None

    def generate_api_url(self, mid, page=1, ps=40):
        if not self.wbi_keys:
            img_key, sub_key = self.get_wbi_keys()
            if not img_key or not sub_key:
                print("❌ 无法生成视频列表API的wbi密钥")
                return None
            self.wbi_keys = (img_key, sub_key)
        else:
            img_key, sub_key = self.wbi_keys

        params = {
            'mid': str(mid),
            'ps': str(ps),
            'tid': '0',
            'pn': str(page),
            'keyword': '',
            'order': 'pubdate',
            'platform': 'web',
            'web_location': '333.1387',
            'order_avoided': 'true'
        }

        signed_params = self.enc_wbi(params, img_key, sub_key)
        base_url = "https://api.bilibili.com/x/space/wbi/arc/search"
        url = f"{base_url}?{urlencode(signed_params)}"

        return url

    # --- 获取UP主信息 ---
    def get_up_info(self, mid):
        # 方案1: 尝试获取详细信息 (需要wbi)
        url = f"https://api.bilibili.com/x/space/wbi/acc/info?mid={mid}"
        response = self._make_request(url)

        if response and response.status_code == 200:
            try:
                data = response.json()
                if data.get('code') == 0:
                    return data['data']
                else:
                    print(f"详细信息获取失败: {data.get('message', '未知错误')} (Code: {data.get('code')})")
            except json.JSONDecodeError:
                print("详细信息响应JSON解析失败。")

        # 方案2: 尝试获取基础信息 (可能不需要wbi)
        print("🔄 尝试获取基础UP主信息...")
        basic_url = f"https://api.bilibili.com/x/space/acc/info?mid={mid}"
        response = self._make_request(basic_url)

        if response and response.status_code == 200:
            try:
                data = response.json()
                if data.get('code') == 0:
                    return data['data']
                else:
                    print(f"基础信息获取失败: {data.get('message', '未知错误')} (Code: {data.get('code')})")
            except json.JSONDecodeError:
                print("基础信息响应JSON解析失败。")

        # 方案3: 从视频页面获取信息 (如果前两者都失败)
        try:
            print("🔄 尝试从视频页面获取UP主信息...")
            first_url = self.generate_api_url(mid, 1)
            if first_url:
                response = self._make_request(first_url)
                if response and response.status_code == 200:
                    data = response.json()
                    if data.get('code') == 0:
                        vlist = data.get('data', {}).get('list', {}).get('vlist', [])
                        if vlist:
                            first_video = vlist[0]
                            return {
                                'name': first_video.get('author', f'UP主{mid}'),
                                'mid': mid,
                                'follower': 0,
                                'video': len(vlist)
                            }
            print("❌ 从视频页面获取UP主信息失败。")
        except Exception as e:
            print(f"从视频页面获取信息时发生异常: {e}")

        return None

    # --- 获取视频详细信息（包含播放、点赞、收藏等） ---
    def get_video_detail(self, aid):
        if not aid:
            return None

        url = "https://api.bilibili.com/x/web-interface/wbi/view/detail"
        if not self.wbi_keys:
            self.get_wbi_keys()
        if not self.wbi_keys:
            print("❌ 无法获取视频详情API的wbi密钥，跳过详情获取。")
            return None

        img_key, sub_key = self.wbi_keys
        params = {"aid": aid}
        signed_params = self.enc_wbi(params, img_key, sub_key)

        response = self._make_request(
            url,
            params=signed_params
        )

        if not response or response.status_code != 200:
            print(f"获取视频 {aid} 详情失败，HTTP状态码: {response.status_code if response else 'N/A'}")
            return None

        try:
            json_data = response.json()
            if json_data.get('code') != 0:
                print(f"详情API返回错误: {json_data.get('message', '未知错误')} (Code: {json_data.get('code')})")
                return None

        except json.JSONDecodeError:
            print("详情JSON解析失败")
            return None

        view = jsonpath.jsonpath(json_data, '$..View.stat')
        tags = jsonpath.jsonpath(json_data, '$..Tags')
        Cards = jsonpath.jsonpath(json_data, '$..Card..card')
        duration_seconds_list = jsonpath.jsonpath(json_data, '$..View.duration')
        duration_seconds = duration_seconds_list[0] if duration_seconds_list else 'N/A'

        pages_list = jsonpath.jsonpath(json_data, '$..View.pages')
        pages_count = len(pages_list[0]) if pages_list and pages_list[0] else 0

        copyright_val = jsonpath.jsonpath(json_data, '$..View.copyright')
        copyright = '原创' if copyright_val and copyright_val[0] == 1 else '转载'

        video_state_val = jsonpath.jsonpath(json_data, '$..View.state')
        video_state = video_state_val[0] if video_state_val else 'N/A'

        related_list = jsonpath.jsonpath(json_data, '$..Related')
        related_count = len(related_list[0]) if related_list and related_list[0] else 0

        mid = name = fans = sign = Official = level_info = 'N/A'
        sex = vip_type = vip_status = nameplate = 'N/A'

        if Cards:
            card_data = Cards[0]
            mid = card_data.get('mid', 'N/A')
            name = card_data.get('name', 'N/A')
            fans = card_data.get('fans', 'N/A')
            sign = card_data.get('sign', 'N/A')
            sex = card_data.get('sex', 'N/A')

            if card_data.get('Official'):
                Official = card_data.get('Official').get('title', '')
            if card_data.get('level_info'):
                level_info = f"{Official}/{card_data.get('level_info').get('current_level')}"

            vip_data = card_data.get('vip', {})
            vip_type_val = vip_data.get('type')
            if vip_type_val == 1:
                vip_type = '月度大会员'
            elif vip_type_val == 2:
                vip_type = '年度及以上大会员'
            else:
                vip_type = '无'
            vip_status = '有效' if vip_data.get('status') == 1 else '无效'

            nameplate_data = card_data.get('nameplate', {})
            nameplate = nameplate_data.get('name', '无')

        region = 'N/A'
        if tags and tags[0]:
            region_list = [tag.get('tag_name') for tag in tags[0]]
            region = '、'.join(region_list)

        like = collect = coin = share = 'N/A'
        if view:
            view_data = view[0]
            like = view_data.get('like', 'N/A')
            collect = view_data.get('favorite', 'N/A')
            coin = view_data.get('coin', 'N/A')
            share = view_data.get('share', 'N/A')

        return (like, collect, coin, share, region, mid, name, fans, sign, level_info, duration_seconds,
                pages_count, copyright, video_state, related_count, sex, vip_type, vip_status, nameplate)

    # --- 时间筛选相关功能 ---
    def is_video_in_time_range(self, created_timestamp):
        return self.start_timestamp <= created_timestamp <= self.end_timestamp

    def format_timestamp(self, timestamp):
        return datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")

    def timestamp_to_datetime(self, timestamp):
        dt_object = datetime.datetime.fromtimestamp(timestamp)
        return dt_object.strftime('%Y-%m-%d %H:%M:%S')

    # --- 视频列表爬取逻辑 ---
    def get_filtered_videos_from_url(self, url):
        response = self._make_request(url)

        if not response or response.status_code != 200:
            return {
                'success': False,
                'message': f"❌ HTTP错误或无响应: {response.status_code if response else 'N/A'}"
            }

        try:
            data = response.json()
            if data.get('code') == 0:
                vlist = data.get('data', {}).get('list', {}).get('vlist', [])
                page_info = data.get('data', {}).get('page', {})

                filtered_videos = []
                all_videos_info = []

                for video in vlist:
                    created_time = video.get('created', 0)
                    video_info = {
                        'created_timestamp': created_time,
                        'created_date': self.format_timestamp(created_time),
                        'in_range': self.is_video_in_time_range(created_time),
                        'video_data': video
                    }
                    all_videos_info.append(video_info)

                    if self.is_video_in_time_range(created_time):
                        filtered_videos.append(video)

                return {
                    'success': True,
                    'all_videos': vlist,
                    'all_videos_info': all_videos_info,
                    'filtered_videos': filtered_videos,
                    'total_count': page_info.get('count', 0),
                    'current_page': page_info.get('pn', 1),
                    'message': f"✅ 页面获取{len(vlist)}个视频，筛选后{len(filtered_videos)}个"
                }
            else:
                return {
                    'success': False,
                    'message': f"❌ API返回错误: {data.get('message', '未知错误')} (Code: {data.get('code')})"
                }
        except json.JSONDecodeError:
            return {
                'success': False,
                'message': "❌ JSON解析失败"
            }
        except Exception as e:
            return {
                'success': False,
                'message': f"❌ 请求处理异常: {str(e)}"
            }

    def generate_api_urls(self, mid, max_pages=50):
        print(f"🎯 为UP主 {mid} 生成API URL（时间范围: {self.start_date} 到 {self.end_date}）...")

        first_url = self.generate_api_url(mid, 1)
        if not first_url:
            print("❌ 无法生成第一页URL")
            return {}

        print(f"📋 测试第一页URL...")
        result = self.get_filtered_videos_from_url(first_url)
        if not result['success']:
            print(f"❌ 第1页测试失败: {result['message']}")
            print("🔄 尝试重新获取wbi密钥...")
            self.wbi_keys = None
            first_url = self.generate_api_url(mid, 1)
            if first_url:
                result = self.get_filtered_videos_from_url(first_url)
                if not result['success']:
                    print(f"❌ 重试后仍然失败: {result['message']}")
                    return {}
            else:
                return {}

        total_videos = result.get('total_count', 0)
        filtered_count = len(result.get('filtered_videos', []))
        videos_per_page = 40

        print(f"📊 第一页统计: 总视频 {len(result.get('all_videos', []))} 个，筛选后 {filtered_count} 个")

        if total_videos > 0:
            actual_pages = (total_videos + videos_per_page - 1) // videos_per_page
            print(f"📊 UP主统计: 总视频数 {total_videos}, 预计页数 {actual_pages}")
        else:
            print("📊 无法获取视频总数，默认生成前几页")
            actual_pages = max_pages

        pages_to_generate = min(actual_pages, max_pages)

        api_urls = {}
        api_urls[1] = first_url

        for page in range(2, pages_to_generate + 1):
            url = self.generate_api_url(mid, page)
            if url:
                api_urls[page] = url
                print(f"✅ 生成第{page}页API URL")
                time.sleep(7)
            else:
                print(f"❌ 生成第{page}页URL失败")
                break

        return api_urls

    def crawl_videos_from_page_with_time_filter(self, page):
        if page not in self.api_urls:
            print(f"第 {page} 页的URL不存在")
            return [], False

        url = self.api_urls[page]

        result = self.get_filtered_videos_from_url(url)
        if not result['success']:
            print(f"第 {page} 页获取失败: {result['message']}")
            return [], False

        filtered_vlist = result['filtered_videos']
        all_videos_info = result['all_videos_info']

        print(result['message'])

        should_continue = False
        if all_videos_info:
            oldest_video_time = min(info['created_timestamp'] for info in all_videos_info)
            newest_video_time = max(info['created_timestamp'] for info in all_videos_info)

            print(f"📅 当前页视频发布时间范围: {self.format_timestamp(oldest_video_time)} 到 {self.format_timestamp(newest_video_time)}")

            if newest_video_time < self.start_timestamp:
                print(f"⏹️  当前页面最新视频发布时间早于筛选开始时间，停止搜索后续页面。")
                should_continue = False
            else:
                should_continue = True

        if not filtered_vlist and should_continue:
            print(f'⚠️ 第 {page} 页没有符合时间范围的视频，但根据时间判断可能需要继续。')
        elif not filtered_vlist and not should_continue:
            print(f'⏹️ 第 {page} 页没有符合时间范围的视频，且已超出时间范围，停止搜索。')
            return [], False

        videos = []
        print(f"第 {page} 页开始处理 {len(filtered_vlist)} 个符合时间范围的视频")

        # 调整每个视频处理后的延时
        DEFAULT_VIDEO_PROCESS_SLEEP = 2.5  # 从3秒减少到2秒

        for i, video_data in enumerate(filtered_vlist):
            title = video_data.get('title', 'N/A')
            aid = video_data.get('aid')
            description = video_data.get('description', 'N/A')
            bvid = video_data.get('bvid', 'N/A')
            video_url = f"https://www.bilibili.com/video/{bvid}"
            length = video_data.get('length', 'N/A')
            play = video_data.get('play', 0)
            comment = video_data.get('comment', 0)
            review = video_data.get('video_review', 0)
            created = self.timestamp_to_datetime(video_data.get('created', 0))
            pic_url = video_data.get('pic', 'N/A')
            is_union_video_val = video_data.get('is_union_video', 0)
            is_union_video = '是' if is_union_video_val == 1 else '否'

            detail_info = self.get_video_detail(aid)
            if detail_info:
                (like, collect, coin, share, region, mid, name, fans, sign, level_info, duration_seconds,
                 pages_count, copyright, video_state, related_count, sex, vip_type, vip_status, nameplate) = detail_info
            else:
                (like, collect, coin, share, region, mid, name, fans, sign, level_info, duration_seconds,
                 pages_count, copyright, video_state, related_count, sex, vip_type, vip_status, nameplate) = (
                    'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
                    'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A'
                )

            video_info = {
                'title': title, 'description': description, 'bvid': bvid, 'video_url': video_url,
                'length': length, 'duration_seconds': duration_seconds,
                'play': play, 'comment': comment, 'like': like, 'review': review, 'collect': collect,
                'coin': coin, 'share': share, 'region': region, 'created': created, 'mid': mid,
                'name': name, 'fans': fans, 'sign': sign, 'level_info': level_info,
                'pic_url': pic_url, 'is_union_video': is_union_video, 'pages_count': pages_count,
                'copyright': copyright, 'video_state': video_state, 'related_count': related_count,
                'sex': sex, 'vip_type': vip_type, 'vip_status': vip_status, 'nameplate': nameplate,
            }

            videos.append(video_info)
            print(f"✅ 处理视频 {i+1}/{len(filtered_vlist)}: {title} (发布于: {created})")

            time.sleep(DEFAULT_VIDEO_PROCESS_SLEEP)

        return videos, should_continue

    def save_to_excel(self, videos, filename):
        wb = Workbook()
        ws = wb.active
        ws.title = "视频列表"

        headers = ['视频标题', '视频描述', '视频BV号', '视频URL', '封面URL', '视频时长', '视频时长秒', '是否合作视频', '多P数量',
                   '版权', '视频状态', '相关视频数量', '播放', '评论', '点赞', '弹幕', '收藏', '投币', '分享',
                   '所属分区', '发布时间','UID','昵称', '性别', '粉丝数','签名','是否认证/等级', '会员类型', '会员状态', '粉丝勋章']
        ws.append(headers)

        for video in videos:
            row = [
                video.get('title'), video.get('description'), video.get('bvid'), video.get('video_url'),
                video.get('pic_url'), video.get('length'), video.get('duration_seconds'), video.get('is_union_video'),
                video.get('pages_count'), video.get('copyright'), video.get('video_state'), video.get('related_count'),
                video.get('play'), video.get('comment'), video.get('like'), video.get('review'), video.get('collect'),
                video.get('coin'), video.get('share'), video.get('region'), video.get('created'), video.get('mid'),
                video.get('name'), video.get('sex'), video.get('fans'), video.get('sign'), video.get('level_info'),
                video.get('vip_type'), video.get('vip_status'), video.get('nameplate')
            ]
            ws.append(row)

        wb.save(filename)
        print(f"💾 数据已保存到 {filename}")

    # --- 单个UP主完整处理 ---
    def process_single_up_complete(self, mid, max_pages=50):  # 移除include_ai_summary参数
        try:
            print(f"\n{'='*60}")
            print(f"🎯 开始处理UP主: {mid}")
            print(f"{'='*60}")

            self.wbi_keys = None  # 重置WBI密钥

            up_info = self.get_up_info(mid)
            if not up_info:
                up_name = f'UP主{mid}'
                up_info = {'name': up_name, 'follower': 0, 'video': 0}
            else:
                up_name = up_info.get('name', f'UP主{mid}')

            print(f"👤 UP主昵称: {up_name}")
            print(f"👥 粉丝数: {up_info.get('follower', 0):,}")
            print(f"📺 投稿视频数: {up_info.get('video', 0):,}")

            print(f"\n📋 生成API URL列表...")
            self.api_urls = self.generate_api_urls(mid, max_pages)

            if not self.api_urls:
                return {
                    'success': False,
                    'up_name': up_name,
                    'up_mid': mid,
                    'video_count': 0,
                    'error': '无法生成API URLs（可能WBI密钥获取失败或UP主无视频）'
                }

            print(f"✅ 成功生成 {len(self.api_urls)} 个API URL")

            print(f"\n🚀 开始爬取指定时间范围内的视频数据...")
            all_videos = []

            # 调整这里的页面间延时
            DEFAULT_PAGE_INTERVAL_SLEEP = 6  # 从8秒减少到5秒

            for page in sorted(self.api_urls.keys()):
                print(f"\n🔄 正在爬取第 {page} 页...")
                videos_on_page, should_continue = self.crawl_videos_from_page_with_time_filter(page)

                if videos_on_page:
                    all_videos.extend(videos_on_page)
                    print(f"✅ 第 {page} 页完成，获取 {len(videos_on_page)} 个符合时间范围的视频")
                else:
                    print(f"⚠️ 第 {page} 页没有符合时间范围的视频。")

                if not should_continue:
                    print(f"🛑 已超出时间范围或所有视频均已处理，停止搜索后续页面。共搜索了 {page} 页")
                    break

                if page < max(self.api_urls.keys()):
                    print(f"⏰ 页面间延时 {DEFAULT_PAGE_INTERVAL_SLEEP} 秒...")
                    time.sleep(DEFAULT_PAGE_INTERVAL_SLEEP)

            if not all_videos:
                return {
                    'success': False,
                    'up_name': up_name,
                    'up_mid': mid,
                    'video_count': 0,
                    'error': '没有找到符合时间范围的视频，或视频列表API获取失败'
                }

            safe_up_name = sanitize_filename(up_name)  # 使用全局函数
            time_range_str = f"{self.start_date.replace('-', '')}_to_{self.end_date.replace('-', '')}"
            video_filename = f"{safe_up_name}_{mid}_{time_range_str}_视频列表.xlsx"
            self.save_to_excel(all_videos, video_filename)

            print(f"\n🎉 视频爬取完成！")
            print(f"📊 时间范围内的视频总数: {len(all_videos)} 个")
            print(f"📁 视频文件保存为: {video_filename}")

            return {
                'success': True,
                'up_name': up_name,
                'up_mid': mid,
                'video_count': len(all_videos),
                'video_filename': video_filename,
            }

        except Exception as e:
            error_msg = f"处理UP主{mid}时发生异常: {str(e)}"
            print(f"❌ {error_msg}")
            print(traceback.format_exc())
            return {
                'success': False,
                'up_name': f'UP主{mid}',
                'up_mid': mid,
                'video_count': 0,
                'error': error_msg
            }

    # --- 批量处理主函数 ---
    def batch_crawl_up_videos_complete(self, excel_path, start_index=0, end_index=None, max_pages_per_up=50):  # 移除include_ai_summary参数
        print("="*100)
        print("🚀 B站UP主批量完整爬取工具（视频信息版）")
        print("="*100)

        # 检查Cookie池
        if not self.cookie_strings_pool:
            print("❌ Cookie池为空！请在config中提供有效的Cookie字符串。程序将退出。")
            return None
        print(f"✅ Cookie池已加载，共 {len(self.cookie_strings_pool)} 个Cookie。")

        print("🔍 测试Cookie有效性 (使用池中第一个Cookie)...")
        is_valid, result = self.test_cookie_validity()

        if not is_valid:
            print(f"❌ 第一个Cookie无效或无法连接到B站API: {result}")
            print("请检查您的网络连接或更新 Cookie 字符串。程序将退出。")
            return None
        else:
            print(f"✅ Cookie有效！当前登录用户: {result['username']} (UID: {result['uid']})")


        up_mids = read_up_list_from_excel(excel_path, start_index, end_index)  # 使用全局函数
        if not up_mids:
            print("❌ 没有有效的UP主ID可处理")
            return None

        print(f"📅 时间筛选范围: {self.start_date} 到 {self.end_date}")
        print(f"📄 每个UP主最大爬取页数: {max_pages_per_up}")

        total_ups = len(up_mids)
        overall_results = []
        failed_ups = []

        print(f"\n🎯 开始批量处理 {total_ups} 个UP主...")

        # 调整这里的UP主间延时
        DEFAULT_UP_INTERVAL_SLEEP = 10  # 从15秒减少到10秒

        for i, mid in enumerate(up_mids, 1):
            print(f"\n{'🔥' * 20}")
            print(f"进度: [{i}/{total_ups}] 处理UP主: {mid}")
            print(f"{'🔥' * 20}")

            try:
                result = self.process_single_up_complete(mid, max_pages_per_up)  # 移除include_ai_summary参数
                overall_results.append(result)

                if result['success']:
                    self.batch_stats['success_count'] += 1
                    self.batch_stats['total_videos'] += result['video_count']
                    print(f"✅ 成功! 获取 {result['video_count']} 个视频")
                else:
                    self.batch_stats['failed_count'] += 1
                    failed_ups.append({
                        'mid': mid,
                        'error': result.get('error', '未知错误')
                    })
                    print(f"❌ 失败: {result.get('error', '未知错误')}")

                self.batch_stats['total_processed'] += 1

                print(f"📊 当前统计: 成功 {self.batch_stats['success_count']}/{i}, "
                      f"失败 {self.batch_stats['failed_count']}, "
                      f"总视频 {self.batch_stats['total_videos']}")

                if i < total_ups:
                    print(f"⏰ UP主间延时 {DEFAULT_UP_INTERVAL_SLEEP} 秒...")
                    time.sleep(DEFAULT_UP_INTERVAL_SLEEP)

            except Exception as e:
                error_msg = f"处理UP主{mid}时发生严重错误: {str(e)}"
                print(f"❌ {error_msg}")
                self.batch_stats['failed_count'] += 1
                failed_ups.append({
                    'mid': mid,
                    'error': error_msg
                })
                overall_results.append({
                    'success': False,
                    'up_name': f'UP主{mid}',
                    'up_mid': mid,
                    'video_count': 0,
                    'error': error_msg
                })

        self.generate_batch_report_complete(overall_results, failed_ups)

        return {
            'total_processed': self.batch_stats['total_processed'],
            'success_count': self.batch_stats['success_count'],
            'failed_count': self.batch_stats['failed_count'],
            'total_videos': self.batch_stats['total_videos'],
            'results': overall_results,
            'failed_ups': failed_ups
        }

    def generate_batch_report_complete(self, results, failed_ups):
        print("\n" + "="*100)
        print("📊 批量处理完成报告（视频信息版）")
        print("="*100)

        print(f"✅ 处理完成: {self.batch_stats['total_processed']} 个UP主")
        print(f"🎉 成功: {self.batch_stats['success_count']} 个")
        print(f"❌ 失败: {self.batch_stats['failed_count']} 个")
        print(f"📺 总视频数: {self.batch_stats['total_videos']} 个")

        if self.batch_stats['success_count'] > 0:
            print(f"\n✅ 成功处理的UP主:")
            for result in results:
                if result['success']:
                    print(f"  - {result['up_name']} ({result.get('up_mid', 'N/A')}): "
                          f"{result['video_count']} 个视频，文件: {result['video_filename']}")

        if failed_ups:
            print(f"\n❌ 失败的UP主:")
            for failed in failed_ups:
                print(f"  - {failed['mid']}: {failed['error']}")

        report_filename = f"批量处理报告_视频信息版_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
        try:
            with open(report_filename, 'w', encoding='utf-8') as f:
                f.write("B站UP主批量视频信息爬取处理报告\n")
                f.write("="*50 + "\n")
                f.write(f"处理时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
                f.write(f"时间范围: {self.start_date} 到 {self.end_date}\n")
                f.write(f"总处理数: {self.batch_stats['total_processed']}\n")
                f.write(f"成功数: {self.batch_stats['success_count']}\n")
                f.write(f"失败数: {self.batch_stats['failed_count']}\n")
                f.write(f"总视频数: {self.batch_stats['total_videos']}\n\n")

                if self.batch_stats['success_count'] > 0:
                    f.write("成功处理的UP主:\n")
                    for result in results:
                        if result['success']:
                            f.write(f"  - {result['up_name']} ({result.get('up_mid', 'N/A')}): "
                                    f"{result['video_count']} 个视频，文件: {result['video_filename']}\n")

                if failed_ups:
                    f.write("\n失败的UP主:\n")
                    for failed in failed_ups:
                        f.write(f"  - {failed['mid']}: {failed['error']}\n")

            print(f"📄 处理报告已保存到: {report_filename}")

        except Exception as e:
            print(f"⚠️ 保存报告文件失败: {str(e)}")


def main():
    """主函数"""
    print("="*100)
    print("🎯 B站UP主视频信息批量爬取工具")
    print("="*100)

    # 检查配置
    if not CONFIG['cookie_strings_pool'] or not any(c for c in CONFIG['cookie_strings_pool']):
        print("❌ 请在CONFIG字典中设置至少一个有效的Cookie字符串到 'cookie_strings_pool'!")
        return
    if not CONFIG['excel_path']:
        print("❌ 请在CONFIG字典中设置你的excel文件路径!")
        return

    print("\n--- 配置信息 ---")
    for key, value in CONFIG.items():
        if key == 'cookie_strings_pool':
            print(f"  {key}: 已配置 {len(value)} 个Cookie")
        else:
            print(f"  {key}: {value}")
    print("------------------\n")

    crawler = BilibiliCrawler(CONFIG)  # 使用新的类名

    # 重要的Cookie有效性测试
    print("\n🚀 正在测试Cookie有效性 (使用池中第一个Cookie)...")
    is_valid_cookie, cookie_check_info = crawler.test_cookie_validity()
    
    if not is_valid_cookie:
        print(f"❌ 第一个Cookie无效或无法连接到B站API: {cookie_check_info}")
        print("请检查您的网络连接或更新 'cookie_strings_pool' 中的Cookie字符串。程序将退出。")
        return
    else:
        print(f"✅ Cookie有效！当前登录用户: {cookie_check_info.get('username', '未知')} (UID: {cookie_check_info.get('uid', '未知')})")


    print(f"\n📊 正在读取UP主MID列表从 {CONFIG['excel_path']} (索引范围: {CONFIG['start_index']} 到 {CONFIG['end_index'] if CONFIG['end_index'] is not None else '末尾'})...")
    
    result_stats = crawler.batch_crawl_up_videos_complete(
        excel_path=CONFIG['excel_path'],
        start_index=CONFIG['start_index'],
        end_index=CONFIG['end_index'],
        max_pages_per_up=CONFIG['max_pages_per_up']
    )

    if result_stats:
        print(f"\n🎉 批量处理完成！")
        print(f"📊 最终统计:")
        print(f"  总计处理UP主: {result_stats['total_processed']} 个")
        print(f"  成功处理UP主: {result_stats['success_count']} 个")
        print(f"  失败处理UP主: {result_stats['failed_count']} 个")
        print(f"  获取视频总数: {result_stats['total_videos']} 个")
        if result_stats['failed_ups']:
            print("\n  以下UP主处理失败，请查看报告获取详情:")
            for f_up in result_stats['failed_ups']:
                print(f"    - MID: {f_up['mid']}, 错误: {f_up['error']}")
    else:
        print("\n❌ 批量处理失败或被中止！")


if __name__ == "__main__":
    main()

🎯 B站UP主视频信息批量爬取工具

--- 配置信息 ---
  excel_path: F:\Code\爬虫\UP主ID.xlsx
  cookie_strings_pool: 已配置 1 个Cookie
  cookie_rotate_interval_seconds: 900
  start_date: 2023-07-01
  end_date: 2024-01-31
  start_index: 750
  end_index: None
  max_pages_per_up: 20
------------------

✅ Cookie已切换至池中索引 0 的Cookie。

🚀 正在测试Cookie有效性 (使用池中第一个Cookie)...
✅ Cookie有效！当前登录用户: 测试用账号114514 (UID: 3546919328549020)

📊 正在读取UP主MID列表从 F:\Code\爬虫\UP主ID.xlsx (索引范围: 750 到 末尾)...
🚀 B站UP主批量完整爬取工具（视频信息版）
✅ Cookie池已加载，共 1 个Cookie。
🔍 测试Cookie有效性 (使用池中第一个Cookie)...
✅ Cookie有效！当前登录用户: 测试用账号114514 (UID: 3546919328549020)
📊 正在读取Excel文件: F:\Code\爬虫\UP主ID.xlsx
✅ 成功读取到 2067 个UP主ID。
🎯 选择处理范围: [750:2067]，共 1317 个UP主。
📅 时间筛选范围: 2023-07-01 到 2024-01-31
📄 每个UP主最大爬取页数: 20

🎯 开始批量处理 1317 个UP主...

🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥
进度: [1/1317] 处理UP主: 3493088737626697
🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥

🎯 开始处理UP主: 3493088737626697
详细信息获取失败: 访问权限不足 (Code: -403)
🔄 尝试获取基础UP主信息...
基础信息获取失败: 请求过于频繁，请稍后再试 (Code: -799)
🔄 尝试从视频页面获取UP主信息...


KeyboardInterrupt: 