In [1]:
# =======================================================
# Cell 1: Imports for Pruning
# =======================================================
import torch
from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
import os

print("Libraries for pruning loaded.")

Libraries for pruning loaded.


In [2]:
# =======================================================
# Cell 2: Pruning Configuration
# =======================================================
# 原始的、微调好的 12 层 FP32 模型的路径
ORIGINAL_MODEL_PATH = "./my_bert_sst2_finetuned/checkpoint-1800" 

# 剪枝后的模型（尚未微调）的保存路径
PRUNED_MODEL_SAVE_PATH = "./saved_models/bert_pruned_8_layers_unfinetuned"

# 用来加载分词器的基础模型名称
TOKENIZER_NAME = "bert-base-uncased"

# 根据敏感度分析结果，决定要移除的层 (从0开始计数)
# 这是我们讨论的“保守方案”，移除4个最不重要的层
LAYERS_TO_REMOVE = {1, 11, 4, 10} 
# -------------------------

# 确保保存目录存在
os.makedirs(PRUNED_MODEL_SAVE_PATH, exist_ok=True)

print(f"Will remove {len(LAYERS_TO_REMOVE)} layers: {sorted(list(LAYERS_TO_REMOVE))}")
print(f"Pruned model will be saved to: {PRUNED_MODEL_SAVE_PATH}")

Will remove 4 layers: [1, 4, 10, 11]
Pruned model will be saved to: ./saved_models/bert_pruned_8_layers_unfinetuned


In [3]:
# =======================================================
# Cell 3: Pruning Logic and Saving
# =======================================================
print("Loading original 12-layer model...")
original_model = AutoModelForSequenceClassification.from_pretrained(ORIGINAL_MODEL_PATH)
original_config = original_model.config
original_state_dict = original_model.state_dict()

print("Creating new pruned model configuration...")
# 1. 创建新的配置，层数减少
pruned_config = AutoConfig.from_pretrained(ORIGINAL_MODEL_PATH)
pruned_config.num_hidden_layers = original_config.num_hidden_layers - len(LAYERS_TO_REMOVE)

# 2. 用新配置初始化一个新模型（此时权重是随机的）
pruned_model = AutoModelForSequenceClassification.from_config(pruned_config)
pruned_state_dict = pruned_model.state_dict()

# 3. 核心：权重映射
print("Mapping weights from original model to pruned model...")

# 拷贝非 Encoder 层的权重 (embeddings, pooler, classifier)
for key in original_state_dict:
    if not key.startswith("bert.encoder.layer."):
        if key in pruned_state_dict:
            pruned_state_dict[key] = original_state_dict[key]

# 拷贝需要保留的 Encoder 层的权重
new_layer_idx = 0
for old_layer_idx in range(original_config.num_hidden_layers):
    if old_layer_idx not in LAYERS_TO_REMOVE:
        # 遍历旧层中的所有参数 (attention, output, layernorm等)
        for key_suffix in [
            "attention.self.query.weight", "attention.self.query.bias",
            "attention.self.key.weight", "attention.self.key.bias",
            "attention.self.value.weight", "attention.self.value.bias",
            "attention.output.dense.weight", "attention.output.dense.bias",
            "attention.output.LayerNorm.weight", "attention.output.LayerNorm.bias",
            "intermediate.dense.weight", "intermediate.dense.bias",
            "output.dense.weight", "output.dense.bias",
            "output.LayerNorm.weight", "output.LayerNorm.bias"
        ]:
            old_key = f"bert.encoder.layer.{old_layer_idx}.{key_suffix}"
            new_key = f"bert.encoder.layer.{new_layer_idx}.{key_suffix}"
            pruned_state_dict[new_key] = original_state_dict[old_key]
        
        new_layer_idx += 1

# 4. 将映射好的权重加载到新模型中
pruned_model.load_state_dict(pruned_state_dict)

# 5. 保存剪枝后的模型和分词器，以备微调
print(f"Saving pruned model to {PRUNED_MODEL_SAVE_PATH}...")
pruned_model.save_pretrained(PRUNED_MODEL_SAVE_PATH)
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
tokenizer.save_pretrained(PRUNED_MODEL_SAVE_PATH)

print("\n--- Pruning Complete! ---")
print(f"Original model had {original_config.num_hidden_layers} layers.")
print(f"Pruned model has {pruned_config.num_hidden_layers} layers.")

Loading original 12-layer model...
Creating new pruned model configuration...
Mapping weights from original model to pruned model...
Saving pruned model to ./saved_models/bert_pruned_8_layers_unfinetuned...

--- Pruning Complete! ---
Original model had 12 layers.
Pruned model has 8 layers.


In [4]:
# =======================================================
# Cell 4: Imports for Fine-tuning
# =======================================================
from datasets import load_dataset
from transformers import Trainer, TrainingArguments
import numpy as np
import evaluate # 使用 evaluate 库来计算指标

print("Libraries for fine-tuning loaded.")

Libraries for fine-tuning loaded.


In [5]:
# =======================================================
# Cell 5: Fine-tuning Config and Data Prep
# =======================================================
# --- 配置 ---
# 这就是我们上一步保存的、等待微调的剪枝模型
MODEL_CHECKPOINT = PRUNED_MODEL_SAVE_PATH 
# 微调后最终模型的输出目录
OUTPUT_DIR = "./models/bert_pruned_8_layers_finetuned"
# (可选) Weights & Biases 项目名称，用于记录训练曲线
WANDB_PROJECT_NAME = "bert_pruning_sst2" 

# --- 数据准备 ---
print("Loading and tokenizing dataset...")
# 加载分词器
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
# 加载数据集
dataset = load_dataset("glue", "sst2")
# 定义分词函数
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True)
# 对整个数据集进行分词
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# 定义评估指标计算函数
accuracy_metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

print("Dataset ready and metrics function defined.")

Loading and tokenizing dataset...


Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

Dataset ready and metrics function defined.


In [6]:
# =======================================================
# Cell 6: Run Fine-tuning
# =======================================================
# 1. 加载我们剪枝过的模型
model_to_finetune = AutoModelForSequenceClassification.from_pretrained(MODEL_CHECKPOINT)

# 2. 设置训练参数
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3, # 重新微调可能需要和原始微调差不多的epoch
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_strategy="steps",
    logging_steps=50,  
    eval_strategy="steps", # 每个epoch结束后在验证集上评估一次
    save_strategy="steps",       # 每个epoch结束后保存一次模型
    eval_steps=200,
    save_steps=200,  
    load_best_model_at_end=True, # 训练结束后加载表现最好的模型
    push_to_hub=False,
    report_to="wandb",           # 启用wandb记录
)

# 设置WandB项目名
os.environ["WANDB_PROJECT"] = WANDB_PROJECT_NAME

# 3. 初始化 Trainer
trainer = Trainer(
    model=model_to_finetune,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# 4. 开始训练！
print("Starting fine-tuning of the pruned model...")
trainer.train()

# 5. 保存最终的最佳模型
print("Saving the best fine-tuned model...")
trainer.save_model(os.path.join(OUTPUT_DIR, "best_model"))

print("\n--- Fine-tuning Complete! ---")
print(f"The final fine-tuned pruned model is saved at: {os.path.join(OUTPUT_DIR, 'best_model')}")

  trainer = Trainer(
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Starting fine-tuning of the pruned model...


[34m[1mwandb[0m: Currently logged in as: [33mmaniaamaeovo[0m ([33mmaniaamaeovo-mania[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Accuracy
200,0.0923,0.257266,0.918578
400,0.0927,0.253976,0.918578
600,0.0863,0.272143,0.924312
800,0.0855,0.247817,0.920872
1000,0.0936,0.24358,0.927752
1200,0.0509,0.313513,0.922018
1400,0.046,0.332521,0.920872
1600,0.0526,0.318038,0.928899
1800,0.055,0.276302,0.927752
2000,0.0769,0.245984,0.928899


Saving the best fine-tuned model...

--- Fine-tuning Complete! ---
The final fine-tuned pruned model is saved at: ./models/bert_pruned_8_layers_finetuned/best_model
