# 冷热神经元分析

数值分析代码

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from tqdm import tqdm  # 进度条
import os
import json

# Enable expandable segments to avoid fragmentation (add this before imports if needed)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 配置
# model_name =    "Qwen/Qwen2.5-1.5B-Instruct"
# model_name =    "meta-llama/Llama-3.2-1B-Instruct" # Llama3.2-1B (Instruct版本)
# model_name =    "Qwen/Qwen1.5-1.8B-Chat"          # Qwen1.5-1.8B (Chat版本)
# model_name =    "google/gemma-2-2b-it"             # Gemma2-2B (Instruct版本)
# model_name =    "microsoft/phi-2"                  # Phi-2-2.7B (基础版本，无Instruct)
# model_name =    "microsoft/Phi-3.5-mini-instruct"  # Phi-3.5-Mini-Instruct (3.8B Mini版本)
# model_name =    "Qwen/Qwen1.5-4B-Chat"            # Qwen1.5 4B (Chat版本)
# model_name =    "THUDM/chatglm2-6b"                # ChatGLM2 6B
# model_name =    "facebook/opt-6.7b"                # OPT6.7B (基础版本)
# model_name =    "mistralai/Mistral-7B-Instruct-v0.1" # Mistral-7B (Instruct版本)
# model_name =    "Qwen/Qwen2-7B-Instruct"          # Qwen2-7B (Instruct版本)
# model_name =    "meta-llama/Meta-Llama-3-8B-Instruct" # LLaMA3-8B (Instruct版本)
num_samples = 500  # 每个数据集的样本数量，可调整为1000+
max_length = 128   # 最大序列长度，控制内存
activation_threshold = 1e-3  # 激活阈值（用于判断是否激活）
hot_freq_threshold = 0.3     # 热神经元频率阈值（e.g., 激活频率 > 50%视为热，可调整）

# 数据集列表：(dataset_name, config, split, text_column)
# text_column 是提取文本的键；如果需要组合多个列，在代码中处理
# 更新：gsm8k的config从None改为"main"
datasets_to_use = [
    ("wikitext", "wikitext-2-raw-v1", "test", "text"),  # 文本数据集
    ("gsm8k", "main", "train", "question"),  # 数学问题（使用'main'配置）
    ("cc_news", None, "train", "text"),  # 新闻文本（bookcorpus的替代）
    ("squad", None, "train", "question"),  # 问答（将组合question + context）
    ("cnn_dailymail", "3.0.0", "train", "article")  # 新闻文章
]

# 加载模型和tokenizer（只加载一次）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
try:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
    model.eval()  # 设置为评估模式
    print("模型加载成功！")
    # Release any initial cached memory after loading
    torch.cuda.empty_cache()
except Exception as e:
    print(f"加载模型失败：{e}")
    exit(1)

# 定义钩子函数（移到钩子注册之前）
handles = []
def hook_fn(layer_idx):
    def fn(module, input, output):
        if isinstance(output, tuple):
            activations_per_layer[layer_idx].append(output[0].detach().cpu())
        else:
            activations_per_layer[layer_idx].append(output.detach().cpu())
    return fn

# 获取层数和注册钩子（添加模型特定处理）
if "qwen" in model_name.lower() or "llama" in model_name.lower() or "gemma" in model_name.lower() or "mistral" in model_name.lower() or "phi" in model_name.lower():
    # 标准：model.model.layers[i].mlp
    num_layers = len(model.model.layers)
    for i in range(num_layers):
        hook = model.model.layers[i].mlp.register_forward_hook(hook_fn(i))
        handles.append(hook)
elif "opt" in model_name.lower():
    # OPT：model.decoder.layers[i].fc2 (捕获FFN输出)
    num_layers = len(model.decoder.layers)
    for i in range(num_layers):
        hook = model.decoder.layers[i].fc2.register_forward_hook(hook_fn(i))
        handles.append(hook)
elif "chatglm" in model_name.lower():
    # ChatGLM：model.transformer.encoder.layers[i].mlp
    num_layers = len(model.transformer.encoder.layers)
    for i in range(num_layers):
        hook = model.transformer.encoder.layers[i].mlp.register_forward_hook(hook_fn(i))
        handles.append(hook)
else:
    raise ValueError(f"不支持的模型: {model_name}。请添加自定义钩子路径。")
print(f"模型总层数: {num_layers}")

# 循环每个数据集
for ds_name, ds_config, ds_split, text_column in datasets_to_use:
    print(f"\n=== 处理数据集: {ds_name} (split: {ds_split}) ===")
    try:
        # 加载数据集
        if ds_config:
            dataset = load_dataset(ds_name, ds_config, split=ds_split)
        else:
            dataset = load_dataset(ds_name, split=ds_split)
        
        # 提取前num_samples个样本的文本
        text_list = dataset[text_column][:num_samples]  # 获取文本列表
        
        # 特殊处理：如果数据集需要组合多个列（如squad）
        if ds_name == "squad":
            context_list = dataset['context'][:num_samples]
            text_list = [q + " " + c for q, c in zip(text_list, context_list)]  # 组合question + context
        
        samples = [text[:max_length] for text in text_list if isinstance(text, str) and text.strip()]  # 截断、过滤无效
        if len(samples) < num_samples:
            print(f"警告: 数据集只有 {len(samples)} 个有效样本")
        if len(samples) == 0:
            print("跳过: 无有效样本")
            continue
        print(f"使用 {len(samples)} 个样本进行分析")
        
        # 重置激活列表
        activations_per_layer = [[] for _ in range(num_layers)]
        
        # 批量运行推理
        with torch.no_grad():
            for text in tqdm(samples, desc="处理样本"):
                if not text.strip():
                    continue
                inputs = tokenizer(text, return_tensors="pt", max_length=max_length, truncation=True).to(device)
                _ = model(**inputs)  # 运行推理，钩子捕获
        
        # 分析激活（与之前相同，处理变长序列）
        if not any(activations_per_layer):
            print("未捕获到激活值，跳过分析")
            continue
        
        # 计算整体平均激活密度
        densities = []
        for layer_acts in activations_per_layer:
            for act in layer_acts:
                if act.numel() > 0:
                    density = (act.abs() > activation_threshold).float().mean().item()
                    densities.append(density)
        if densities:
            avg_density = sum(densities) / len(densities)
        else:
            avg_density = 0.0
            print("无有效激活数据，无法计算密度")
        
        # 为每个层计算冷热神经元百分比，并收集数据（改进：基于激活频率）
        layers_data = []
        for layer_idx in range(num_layers):
            layer_acts = activations_per_layer[layer_idx]
            if not layer_acts:
                continue
            # 计算每个神经元的激活频率（跨样本平均比例）
            neuron_freq = []  # 每个样本的频率向量列表
            for act in layer_acts:
                if act.numel() > 0:
                    freq = (act.abs() > activation_threshold).float().mean(dim=[0, 1])  # 每个神经元的样本内平均激活比例
                    neuron_freq.append(freq.unsqueeze(0))  # [1, hidden_size]
            if not neuron_freq:
                continue
            # 跨样本平均频率 [hidden_size]
            aggregated_freq = torch.cat(neuron_freq, dim=0).mean(dim=0)
            total_neurons = aggregated_freq.size(0)
            # 热神经元：平均频率 > hot_freq_threshold
            hot_neurons = (aggregated_freq > hot_freq_threshold).sum().item()
            hot_pct = (hot_neurons / total_neurons * 100) if total_neurons > 0 else 0
            cold_pct = 100 - hot_pct
            layers_data.append({
                "layer_id": layer_idx,
                "total_neurons": total_neurons,
                "hot_neurons": hot_neurons,
                "hot_pct": hot_pct,
                "cold_neurons": total_neurons - hot_neurons,
                "cold_pct": cold_pct
            })
        
        # 保存为JSON：创建模型名称的文件夹（清理斜杠为下划线）
        model_folder = model_name.replace("/", "_").replace("-", "_")  # 清理为有效文件夹名，例如 "meta_llama_Llama_3_2_1B_Instruct"
        os.makedirs(model_folder, exist_ok=True)  # 创建文件夹，如果不存在
        json_filename = f"{ds_name}_activation_results.json"
        json_path = os.path.join(model_folder, json_filename)  # 完整路径
        results = {
            "dataset": ds_name,
            "split": ds_split,
            "num_samples": len(samples),
            "average_density": avg_density,
            "layers": layers_data
        }
        with open(json_path, 'w') as f:
            json.dump(results, f, indent=4)
        print(f"结果保存为: {json_path}")
        
        # 清空GPU缓存，准备下一个数据集
        torch.cuda.empty_cache()
    
    except Exception as e:
        print(f"处理 {ds_name} 失败: {e}")

# 移除钩子（在所有数据集后）
for handle in handles:
    handle.remove()
print("\n所有数据集处理完成！")

使用设备: cuda


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.31it/s]


模型加载成功！
模型总层数: 26

=== 处理数据集: wikitext (split: test) ===
警告: 数据集只有 321 个有效样本
使用 321 个样本进行分析


处理样本: 100%|██████████| 321/321 [00:11<00:00, 28.22it/s]


结果保存为: google_gemma_2_2b_it/wikitext_activation_results.json

=== 处理数据集: gsm8k (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:16<00:00, 30.80it/s]


结果保存为: google_gemma_2_2b_it/gsm8k_activation_results.json

=== 处理数据集: cc_news (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:15<00:00, 32.72it/s]


结果保存为: google_gemma_2_2b_it/cc_news_activation_results.json

=== 处理数据集: squad (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:14<00:00, 34.58it/s]


结果保存为: google_gemma_2_2b_it/squad_activation_results.json

=== 处理数据集: cnn_dailymail (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:15<00:00, 32.50it/s]


结果保存为: google_gemma_2_2b_it/cnn_dailymail_activation_results.json

所有数据集处理完成！


统计激活百分比、稀疏度

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from tqdm import tqdm
import os
import json
import numpy as np

# Enable expandable segments to avoid fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 配置
# model_name = "microsoft/phi-2"
# model_name = "Qwen/Qwen2.5-1.5B-Instruct"
# model_name = "meta-llama/Llama-3.2-1B-Instruct"
# model_name = "Qwen/Qwen1.5-1.8B-Chat"
# model_name = "google/gemma-2-2b-it"
model_name = "microsoft/Phi-3.5-mini-instruct"
# model_name = "Qwen/Qwen1.5-4B-Chat"
# model_name = "THUDM/chatglm2-6b"
# model_name = "facebook/opt-6.7b"
# model_name = "mistralai/Mistral-7B-Instruct-v0.1"
# model_name = "Qwen/Qwen2-7B-Instruct"
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

num_samples = 500
max_length = 128
activation_threshold = 1e-3  # 激活阈值

# 数据集列表
datasets_to_use = [
    ("wikitext", "wikitext-2-raw-v1", "test", "text"),
    ("gsm8k", "main", "train", "question"),
    ("cc_news", None, "train", "text"),
    ("squad", None, "train", "question"),
    ("cnn_dailymail", "3.0.0", "train", "article")
]

# 加载模型和tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
try:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # 添加pad_token如果不存在
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
    model.eval()
    print("模型加载成功！")
    print(f"模型类型: {type(model).__name__}")
    torch.cuda.empty_cache()
except Exception as e:
    print(f"加载模型失败：{e}")
    exit(1)

# 定义钩子函数 - 修改为实时计算激活百分比
activation_percentages_per_layer = []
handles = []

def hook_fn(layer_idx):
    def fn(module, input, output):
        with torch.no_grad():
            if isinstance(output, tuple):
                act = output[0]
            else:
                act = output
            
            # 计算当前批次的激活百分比
            if act.dim() == 3:
                # 对每个token计算激活神经元数，然后平均
                activated = (act.abs() > activation_threshold).float()
                # 沿着batch和sequence维度计算每个神经元的激活率
                activation_rate = activated.mean(dim=[0, 1])  # [hidden_size]
                # 计算激活神经元的百分比
                activation_pct = activation_rate.mean().item() * 100
            else:
                # 处理其他维度的情况
                activated = (act.abs() > activation_threshold).float()
                activation_pct = activated.mean().item() * 100
            
            activation_percentages_per_layer[layer_idx].append(activation_pct)
    return fn

# 获取层数和注册钩子 - 改进模型类型检测
model_type = type(model).__name__.lower()
print(f"检测到的模型类型: {model_type}")

# 根据模型类型选择正确的层路径
if "opt" in model_type:
    # OPT模型：model.model.decoder.layers
    num_layers = len(model.model.decoder.layers)
    for i in range(num_layers):
        hook = model.model.decoder.layers[i].fc2.register_forward_hook(hook_fn(i))
        handles.append(hook)
elif "chatglm" in model_type:
    # ChatGLM模型
    num_layers = len(model.transformer.encoder.layers)
    for i in range(num_layers):
        hook = model.transformer.encoder.layers[i].mlp.register_forward_hook(hook_fn(i))
        handles.append(hook)
elif hasattr(model, 'model') and hasattr(model.model, 'layers'):
    # 大多数现代模型（Qwen, LLaMA, Gemma, Mistral, Phi等）
    num_layers = len(model.model.layers)
    for i in range(num_layers):
        # 检查MLP模块的具体结构
        layer = model.model.layers[i]
        if hasattr(layer, 'mlp'):
            hook = layer.mlp.register_forward_hook(hook_fn(i))
            handles.append(hook)
        elif hasattr(layer, 'feed_forward'):
            hook = layer.feed_forward.register_forward_hook(hook_fn(i))
            handles.append(hook)
        else:
            print(f"警告: 第{i}层未找到MLP模块")
elif hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
    # GPT-2风格的模型
    num_layers = len(model.transformer.h)
    for i in range(num_layers):
        hook = model.transformer.h[i].mlp.register_forward_hook(hook_fn(i))
        handles.append(hook)
else:
    # 尝试自动检测层结构
    print("尝试自动检测模型结构...")
    # 打印模型结构以便调试
    for name, module in model.named_modules():
        if 'layer' in name.lower() or 'block' in name.lower():
            print(f"发现层: {name}")
    raise ValueError(f"无法自动检测模型 {model_name} 的层结构。请手动添加支持。")

print(f"模型总层数: {num_layers}")
print(f"成功注册 {len(handles)} 个钩子")

# 存储所有数据集的结果
all_dataset_results = {}

# 循环每个数据集
for ds_name, ds_config, ds_split, text_column in datasets_to_use:
    print(f"\n=== 处理数据集: {ds_name} (split: {ds_split}) ===")
    try:
        # 加载数据集
        if ds_config:
            dataset = load_dataset(ds_name, ds_config, split=ds_split)
        else:
            dataset = load_dataset(ds_name, split=ds_split)
        
        # 提取样本文本
        text_list = dataset[text_column][:num_samples]
        
        # 特殊处理squad数据集
        if ds_name == "squad":
            context_list = dataset['context'][:num_samples]
            text_list = [q + " " + c for q, c in zip(text_list, context_list)]
        
        samples = [text for text in text_list if isinstance(text, str) and text.strip()]
        if len(samples) == 0:
            print("跳过: 无有效样本")
            continue
        print(f"使用 {len(samples)} 个样本进行分析")
        
        # 重置每层的激活百分比列表
        activation_percentages_per_layer = [[] for _ in range(num_layers)]
        
        # 批量运行推理
        with torch.no_grad():
            for text in tqdm(samples, desc="处理样本"):
                if not text.strip():
                    continue
                # 截断文本
                text = text[:max_length]
                inputs = tokenizer(text, return_tensors="pt", max_length=max_length, 
                                 truncation=True, padding=True).to(device)
                try:
                    _ = model(**inputs)
                except Exception as e:
                    print(f"推理错误: {e}")
                    continue
        
        # 计算每层的平均激活百分比
        layer_avg_activations = []
        for layer_idx in range(num_layers):
            layer_percentages = activation_percentages_per_layer[layer_idx]
            if layer_percentages:
                avg_pct = np.mean(layer_percentages)
                layer_avg_activations.append({
                    "layer_id": layer_idx,
                    "avg_activation_percentage": round(avg_pct, 4),
                    "num_samples": len(layer_percentages)
                })
            else:
                layer_avg_activations.append({
                    "layer_id": layer_idx,
                    "avg_activation_percentage": 0.0,
                    "num_samples": 0
                })
        
        # 计算所有层的整体平均激活百分比
        all_layer_percentages = []
        for layer_data in layer_avg_activations:
            if layer_data["num_samples"] > 0:
                all_layer_percentages.append(layer_data["avg_activation_percentage"])
        
        overall_avg = np.mean(all_layer_percentages) if all_layer_percentages else 0.0
        
        # 保存该数据集的结果
        dataset_result = {
            "dataset": ds_name,
            "split": ds_split,
            "num_samples": len(samples),
            "activation_threshold": activation_threshold,
            "overall_avg_activation_percentage": round(overall_avg, 4),
            "layers": layer_avg_activations
        }
        
        all_dataset_results[ds_name] = dataset_result
        
        # 打印摘要
        print(f"\n{ds_name} 数据集结果:")
        print(f"  - 整体平均激活百分比: {overall_avg:.2f}%")
        if all_layer_percentages:
            print(f"  - 各层激活百分比范围: {min(all_layer_percentages):.2f}% - {max(all_layer_percentages):.2f}%")
        
        # 清空GPU缓存
        torch.cuda.empty_cache()
    
    except Exception as e:
        print(f"处理 {ds_name} 失败: {e}")
        import traceback
        traceback.print_exc()

# 保存所有结果到一个综合JSON文件
model_folder = model_name.replace("/", "_").replace("-", "_")
os.makedirs(model_folder, exist_ok=True)

# 保存各数据集的详细结果
summary_filename = "activation_summary_all_datasets.json"
summary_path = os.path.join(model_folder, summary_filename)
summary_results = {
    "model": model_name,
    "model_type": model_type,
    "num_layers": num_layers,
    "activation_threshold": activation_threshold,
    "datasets": all_dataset_results
}
with open(summary_path, 'w') as f:
    json.dump(summary_results, f, indent=4)
print(f"\n综合结果保存为: {summary_path}")

# 生成简化的比较表格
comparison_filename = "activation_comparison.json"
comparison_path = os.path.join(model_folder, comparison_filename)
comparison_data = {
    "model": model_name,
    "activation_threshold": activation_threshold,
    "dataset_averages": {}
}
for ds_name, result in all_dataset_results.items():
    comparison_data["dataset_averages"][ds_name] = {
        "overall_activation_percentage": result["overall_avg_activation_percentage"],
        "num_samples": result["num_samples"]
    }
with open(comparison_path, 'w') as f:
    json.dump(comparison_data, f, indent=4)
print(f"比较结果保存为: {comparison_path}")

# 移除钩子
for handle in handles:
    handle.remove()
print("\n所有数据集处理完成！")

使用设备: cuda


Downloading shards: 100%|██████████| 2/2 [01:57<00:00, 58.99s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.44it/s]


加载模型失败：CUDA out of memory. Tried to allocate 96.00 MiB. GPU 0 has a total capacity of 23.65 GiB of which 9.06 MiB is free. Including non-PyTorch memory, this process has 23.51 GiB memory in use. Of the allocated memory 23.02 GiB is allocated by PyTorch, and 52.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
检测到的模型类型: phiforcausallm
模型总层数: 32
成功注册 32 个钩子

=== 处理数据集: wikitext (split: test) ===
使用 321 个样本进行分析


处理样本: 100%|██████████| 321/321 [00:07<00:00, 40.57it/s]



wikitext 数据集结果:
  - 整体平均激活百分比: 99.70%
  - 各层激活百分比范围: 99.53% - 99.85%

=== 处理数据集: gsm8k (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:12<00:00, 39.95it/s]



gsm8k 数据集结果:
  - 整体平均激活百分比: 99.71%
  - 各层激活百分比范围: 99.64% - 99.84%

=== 处理数据集: cc_news (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:12<00:00, 39.99it/s]



cc_news 数据集结果:
  - 整体平均激活百分比: 99.71%
  - 各层激活百分比范围: 99.64% - 99.83%

=== 处理数据集: squad (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:12<00:00, 40.38it/s]



squad 数据集结果:
  - 整体平均激活百分比: 99.72%
  - 各层激活百分比范围: 99.66% - 99.84%

=== 处理数据集: cnn_dailymail (split: train) ===
使用 500 个样本进行分析


处理样本: 100%|██████████| 500/500 [00:12<00:00, 39.94it/s]


cnn_dailymail 数据集结果:
  - 整体平均激活百分比: 99.71%
  - 各层激活百分比范围: 99.64% - 99.83%

综合结果保存为: microsoft_Phi_3.5_mini_instruct/activation_summary_all_datasets.json
比较结果保存为: microsoft_Phi_3.5_mini_instruct/activation_comparison.json

所有数据集处理完成！





添加循环，顺序执行

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from tqdm import tqdm
import os
import json
import numpy as np
import gc
import time
from datetime import datetime

# Enable expandable segments to avoid fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 配置
# 定义所有要测试的模型列表
MODEL_LIST = [
    "Qwen/Qwen2.5-1.5B-Instruct",
    "meta-llama/Llama-3.2-1B-Instruct",
    "Qwen/Qwen1.5-1.8B-Chat",
    "google/gemma-2-2b-it",
    "microsoft/phi-2",
    "microsoft/Phi-3.5-mini-instruct",
    "Qwen/Qwen1.5-4B-Chat",
    "THUDM/chatglm2-6b",
    "facebook/opt-6.7b",
    "mistralai/Mistral-7B-Instruct-v0.1",
    "Qwen/Qwen2-7B-Instruct",
    "meta-llama/Meta-Llama-3-8B-Instruct"
]

# 通用配置
num_samples = 500
max_length = 128
activation_threshold = 1e-3
batch_size = 8

# 数据集列表
datasets_to_use = [
    ("wikitext", "wikitext-2-raw-v1", "test", "text"),
    ("gsm8k", "main", "train", "question"),
    ("cc_news", None, "train", "text"),
    ("squad", None, "train", "question"),
    ("cnn_dailymail", "3.0.0", "train", "article")
]

# 定义清理显存的函数
def clear_gpu_memory():
    """清理GPU显存"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
        gc.collect()

# 定义处理单个模型的函数
def process_single_model(model_name):
    """处理单个模型的所有数据集测试"""
    print(f"\n{'='*80}")
    print(f"开始处理模型: {model_name}")
    print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"{'='*80}\n")
    
    # 记录开始时间
    model_start_time = time.time()
    
    # 加载模型和tokenizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    try:
        # 加载tokenizer
        print("加载tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # 加载模型
        print("加载模型...")
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,  # 使用半精度以节省显存
            device_map="auto"  # 自动设备映射
        )
        model.eval()
        print("模型加载成功！")
        print(f"模型类型: {type(model).__name__}")
        clear_gpu_memory()
        
    except Exception as e:
        print(f"加载模型失败：{e}")
        clear_gpu_memory()
        return False
    
    # 定义钩子函数
    activation_percentages_per_layer = []
    handles = []
    
    def hook_fn(layer_idx):
        def fn(module, input, output):
            with torch.no_grad():
                if isinstance(output, tuple):
                    act = output[0]
                else:
                    act = output
                
                # 计算激活百分比
                if act.dim() == 3:
                    activated = (act.abs() > activation_threshold).float()
                    activation_rate = activated.mean(dim=[0, 1])
                    activation_pct = activation_rate.mean().item() * 100
                else:
                    activated = (act.abs() > activation_threshold).float()
                    activation_pct = activated.mean().item() * 100
                
                activation_percentages_per_layer[layer_idx].append(activation_pct)
                
                # 清理内存
                del act, activated
                if 'activation_rate' in locals():
                    del activation_rate
        return fn
    
    # 注册钩子
    try:
        model_type = type(model).__name__.lower()
        print(f"检测到的模型类型: {model_type}")
        
        # 根据模型类型选择正确的层路径
        if "opt" in model_type:
            num_layers = len(model.model.decoder.layers)
            for i in range(num_layers):
                hook = model.model.decoder.layers[i].fc2.register_forward_hook(hook_fn(i))
                handles.append(hook)
        elif "chatglm" in model_type:
            num_layers = len(model.transformer.encoder.layers)
            for i in range(num_layers):
                hook = model.transformer.encoder.layers[i].mlp.register_forward_hook(hook_fn(i))
                handles.append(hook)
        elif hasattr(model, 'model') and hasattr(model.model, 'layers'):
            num_layers = len(model.model.layers)
            for i in range(num_layers):
                layer = model.model.layers[i]
                if hasattr(layer, 'mlp'):
                    hook = layer.mlp.register_forward_hook(hook_fn(i))
                    handles.append(hook)
                elif hasattr(layer, 'feed_forward'):
                    hook = layer.feed_forward.register_forward_hook(hook_fn(i))
                    handles.append(hook)
        elif hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
            num_layers = len(model.transformer.h)
            for i in range(num_layers):
                hook = model.transformer.h[i].mlp.register_forward_hook(hook_fn(i))
                handles.append(hook)
        else:
            print(f"无法自动检测模型 {model_name} 的层结构")
            return False
            
        print(f"模型总层数: {num_layers}")
        print(f"成功注册 {len(handles)} 个钩子")
        
    except Exception as e:
        print(f"注册钩子失败: {e}")
        # 清理已加载的模型
        del model
        clear_gpu_memory()
        return False
    
    # 存储所有数据集的结果
    all_dataset_results = {}
    
    # 循环每个数据集
    for ds_name, ds_config, ds_split, text_column in datasets_to_use:
        print(f"\n--- 处理数据集: {ds_name} (split: {ds_split}) ---")
        dataset_start_time = time.time()
        
        try:
            # 加载数据集
            if ds_config:
                dataset = load_dataset(ds_name, ds_config, split=ds_split)
            else:
                dataset = load_dataset(ds_name, split=ds_split)
            
            # 提取样本文本
            text_list = dataset[text_column][:num_samples]
            
            # 特殊处理squad数据集
            if ds_name == "squad":
                context_list = dataset['context'][:num_samples]
                text_list = [q + " " + c for q, c in zip(text_list, context_list)]
            
            samples = [text for text in text_list if isinstance(text, str) and text.strip()]
            if len(samples) == 0:
                print("跳过: 无有效样本")
                continue
            print(f"使用 {len(samples)} 个样本进行分析")
            
            # 重置激活记录
            activation_percentages_per_layer = [[] for _ in range(num_layers)]
            
            # 批量运行推理
            with torch.no_grad():
                for i, text in enumerate(tqdm(samples, desc="处理样本")):
                    if not text.strip():
                        continue
                    text = text[:max_length]
                    inputs = tokenizer(text, return_tensors="pt", max_length=max_length, 
                                     truncation=True, padding=True).to(device)
                    try:
                        outputs = model(**inputs)
                        del outputs
                    except Exception as e:
                        print(f"推理错误: {e}")
                        continue
                    finally:
                        del inputs
                    
                    # 定期清理显存
                    if (i + 1) % batch_size == 0:
                        clear_gpu_memory()
            
            # 清理显存
            clear_gpu_memory()
            
            # 计算统计数据
            layer_avg_activations = []
            for layer_idx in range(num_layers):
                layer_percentages = activation_percentages_per_layer[layer_idx]
                if layer_percentages:
                    avg_pct = np.mean(layer_percentages)
                    layer_avg_activations.append({
                        "layer_id": layer_idx,
                        "avg_activation_percentage": round(avg_pct, 4),
                        "num_samples": len(layer_percentages)
                    })
                else:
                    layer_avg_activations.append({
                        "layer_id": layer_idx,
                        "avg_activation_percentage": 0.0,
                        "num_samples": 0
                    })
            
            # 计算整体平均
            all_layer_percentages = [
                layer_data["avg_activation_percentage"] 
                for layer_data in layer_avg_activations 
                if layer_data["num_samples"] > 0
            ]
            overall_avg = np.mean(all_layer_percentages) if all_layer_percentages else 0.0
            
            # 保存结果
            dataset_result = {
                "dataset": ds_name,
                "split": ds_split,
                "num_samples": len(samples),
                "activation_threshold": activation_threshold,
                "overall_avg_activation_percentage": round(overall_avg, 4),
                "layers": layer_avg_activations,
                "processing_time": round(time.time() - dataset_start_time, 2)
            }
            
            all_dataset_results[ds_name] = dataset_result
            
            # 打印摘要
            print(f"\n{ds_name} 数据集结果:")
            print(f"  - 整体平均激活百分比: {overall_avg:.2f}%")
            print(f"  - 处理时间: {dataset_result['processing_time']} 秒")
            
            # 清理变量
            del dataset, text_list, samples
            if ds_name == "squad" and 'context_list' in locals():
                del context_list
            clear_gpu_memory()
            
        except Exception as e:
            print(f"处理 {ds_name} 失败: {e}")
            clear_gpu_memory()
    
    # 保存结果
    model_folder = model_name.replace("/", "_").replace("-", "_")
    os.makedirs(model_folder, exist_ok=True)
    
    # 详细结果
    summary_filename = "activation_summary_all_datasets.json"
    summary_path = os.path.join(model_folder, summary_filename)
    summary_results = {
        "model": model_name,
        "model_type": model_type,
        "num_layers": num_layers,
        "activation_threshold": activation_threshold,
        "total_processing_time": round(time.time() - model_start_time, 2),
        "datasets": all_dataset_results
    }
    with open(summary_path, 'w') as f:
        json.dump(summary_results, f, indent=4)
    print(f"\n综合结果保存为: {summary_path}")
    
    # 简化比较结果
    comparison_filename = "activation_comparison.json"
    comparison_path = os.path.join(model_folder, comparison_filename)
    comparison_data = {
        "model": model_name,
        "activation_threshold": activation_threshold,
        "dataset_averages": {}
    }
    for ds_name, result in all_dataset_results.items():
        comparison_data["dataset_averages"][ds_name] = {
            "overall_activation_percentage": result["overall_avg_activation_percentage"],
            "num_samples": result["num_samples"]
        }
    with open(comparison_path, 'w') as f:
        json.dump(comparison_data, f, indent=4)
    
    # 移除钩子
    for handle in handles:
        handle.remove()
    
    # 删除模型并清理显存
    del model
    if 'tokenizer' in locals():
        del tokenizer
    clear_gpu_memory()
    
    print(f"\n模型 {model_name} 处理完成！")
    print(f"总处理时间: {round(time.time() - model_start_time, 2)} 秒")
    
    return True

# 主程序：循环处理所有模型
def main():
    """主函数：循环处理所有模型"""
    print("="*80)
    print("批量模型激活稀疏度测试")
    print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"待测试模型数量: {len(MODEL_LIST)}")
    print("="*80)
    
    # 创建总结果文件夹
    os.makedirs("all_models_results", exist_ok=True)
    
    # 记录处理状态
    processing_log = {
        "start_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        "models": {}
    }
    
    successful_models = []
    failed_models = []
    
    # 循环处理每个模型
    for idx, model_name in enumerate(MODEL_LIST, 1):
        print(f"\n\n{'#'*80}")
        print(f"进度: {idx}/{len(MODEL_LIST)}")
        print(f"{'#'*80}")
        
        try:
            success = process_single_model(model_name)
            if success:
                successful_models.append(model_name)
                processing_log["models"][model_name] = "成功"
            else:
                failed_models.append(model_name)
                processing_log["models"][model_name] = "失败"
        except Exception as e:
            print(f"处理模型 {model_name} 时发生严重错误: {e}")
            failed_models.append(model_name)
            processing_log["models"][model_name] = f"错误: {str(e)}"
        
        # 每个模型处理完后都清理显存
        clear_gpu_memory()
        
        # 短暂等待，确保资源完全释放
        time.sleep(5)
    
    # 保存处理日志
    processing_log["end_time"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    processing_log["successful_models"] = successful_models
    processing_log["failed_models"] = failed_models
    processing_log["success_rate"] = f"{len(successful_models)}/{len(MODEL_LIST)}"
    
    log_path = os.path.join("all_models_results", "processing_log.json")
    with open(log_path, 'w') as f:
        json.dump(processing_log, f, indent=4)
    
    # 打印最终统计
    print("\n\n" + "="*80)
    print("所有模型处理完成！")
    print(f"结束时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"成功: {len(successful_models)} 个模型")
    print(f"失败: {len(failed_models)} 个模型")
    
    if successful_models:
        print("\n成功的模型:")
        for model in successful_models:
            print(f"  ✓ {model}")
    
    if failed_models:
        print("\n失败的模型:")
        for model in failed_models:
            print(f"  ✗ {model}")
    
    print(f"\n处理日志保存在: {log_path}")
    print("="*80)

# 运行主程序
if __name__ == "__main__":
    main()