# 任务一：IMDB-10 情感分类 (优化版)

本Notebook旨在完成IMDB电影评论的十分类情感（评分1-10）任务。我们将根据实验要求，实现并对比三种不同的深度学习模型。

**优化说明:**
此版本针对模型二和模型三的训练速度进行了优化，主要改动包括：
1.  **数据预分词 (Pre-tokenization):** 将文本一次性转换为BERT输入格式，避免在训练循环中重复分词，消除CPU瓶颈。
2.  **增大批次大小 (Batch Size):** 根据8GB显存配置，将BERT模型的批次大小从32提升至64，提高GPU利用率。
3.  **添加`tqdm`进度条:** 为训练和评估过程添加了简洁的进度条，方便跟踪进度。
4.  **启用CuDNN基准测试:** 优化底层计算效率。

**实验方案：**
1.  **模型一：** GloVe词向量 + BiLSTM + 全连接层
2.  **模型二：** BERT-base 嵌入 + BiLSTM + 分类头
3.  **模型三：** 微调 BERT-base


## 1. 环境设置与依赖安装

首先，安装所有必需的Python库。请确保`tqdm`已安装 (`pip install tqdm`)。

## 2. 数据加载与预处理

In [1]:
import pandas as pd
import re

def load_imdb_data(file_path):
    """加载并解析IMDB数据文件，使用正则表达式查找标签。"""
    texts = []
    labels = []
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                match = re.search(r'\t\t(\d+)\t\t', line)
                if match:
                    rating = int(match.group(1))
                    text_start_index = match.end()
                    text = line[text_start_index:].strip()
                    text = text.replace('<sssss>', ' ').strip()
                    labels.append(rating - 1)
                    texts.append(text)
    except FileNotFoundError:
        print(f"错误: 文件未找到 {file_path}。请确保数据文件在当前目录下。")
        return pd.DataFrame({'text': [], 'label': []})
    
    df = pd.DataFrame({'text': texts, 'label': labels})
    # 重置索引以确保后续 .loc[idx] 的正确性
    return df.reset_index(drop=True)

# 定义文件路径
train_file = 'imdb.train.txt.ss'
dev_file = 'imdb.dev.txt.ss'
test_file = 'imdb.test.txt.ss'

# 加载所有数据集
df_train = load_imdb_data(train_file)
df_val = load_imdb_data(dev_file)
df_test = load_imdb_data(test_file)

if not df_train.empty:
    print(f"训练集大小: {df_train.shape}")
    print(f"验证集大小: {df_val.shape}")
    print(f"测试集大小: {df_test.shape}")
    print("\n数据样本示例:")
    print(df_train.head())

训练集大小: (67426, 2)
验证集大小: (8381, 2)
测试集大小: (9112, 2)

数据样本示例:
                                                text  label
0  i excepted a lot from this movie , and it did ...      9
1  this movie is not worth seeing .   has no meri...      0
2  this is a truly remarkable horror movie .   al...      9
3  * minor spoilers * this movie is inept .   so ...      2
4  this is a brilliant horror movie .   fans of t...      9


## 3. 模型一：GloVe + BiLSTM

### 3.1. GloVe词向量与数据准备

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import Counter
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
import numpy as np
from tqdm import tqdm

# --- 参数配置 ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True # OPTIMIZATION
NUM_CLASSES = 10
BATCH_SIZE_LSTM = 64
EMBEDDING_DIM = 300
HIDDEN_DIM = 256
N_LAYERS = 2
DROPOUT = 0.5
EPOCHS = 5
MAX_LEN_LSTM = 512
GLOVE_PATH = r"D:\glove_vectors\glove.840B.300d\glove.840B.300d.txt"

# --- 文本分词器 ---
def tokenizer(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9' ]+", "", text)
    return text.split()

# --- 构建词汇表 ---
print("正在构建词汇表...")
word_counts = Counter()
for text in df_train['text']:
    word_counts.update(tokenizer(text))
vocab = sorted(word_counts, key=word_counts.get, reverse=True)
word_to_idx = {word: i + 2 for i, word in enumerate(vocab)}
word_to_idx['<pad>'] = 0
word_to_idx['<unk>'] = 1
VOCAB_SIZE = len(word_to_idx)

# --- 加载GloVe词向量 ---
print("正在加载GloVe词向量...")
glove_embeddings = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))
try:
    with open(GLOVE_PATH, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.split()
            word = parts[0]
            if len(parts) != EMBEDDING_DIM + 1:
                continue
            if word in word_to_idx:
                try:
                    vector = np.array(parts[1:], dtype=np.float32)
                    glove_embeddings[word_to_idx[word]] = vector
                except ValueError:
                    continue
    glove_embeddings = torch.tensor(glove_embeddings, dtype=torch.float32)
    print("GloVe加载完成。")
except FileNotFoundError:
    print(f"错误: GloVe文件未找到 {GLOVE_PATH}。模型一将无法运行。")
    glove_embeddings = torch.randn(VOCAB_SIZE, EMBEDDING_DIM) # 使用随机向量代替

# --- PyTorch Dataset ---
class IMDBDataset(Dataset):
    def __init__(self, dataframe, word_to_idx, max_len):
        self.df = dataframe
        self.word_to_idx = word_to_idx
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = self.df.loc[idx, 'text']
        label = self.df.loc[idx, 'label']
        tokens = [self.word_to_idx.get(word, 1) for word in tokenizer(text)]
        if len(tokens) > self.max_len:
            tokens = tokens[:self.max_len]
        else:
            tokens.extend([0] * (self.max_len - len(tokens)))
        return torch.tensor(tokens), torch.tensor(label)

# --- 创建DataLoaders ---
train_dataset = IMDBDataset(df_train, word_to_idx, MAX_LEN_LSTM)
val_dataset = IMDBDataset(df_val, word_to_idx, MAX_LEN_LSTM)
test_dataset = IMDBDataset(df_test, word_to_idx, MAX_LEN_LSTM)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE_LSTM, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE_LSTM)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE_LSTM)

print("\n数据准备完成。")

正在构建词汇表...
正在加载GloVe词向量...
GloVe加载完成。

数据准备完成。


### 3.2. 模型一结构定义

In [3]:
class BiLSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, dropout, pretrained_embeddings):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.embedding.weight.data.copy_(pretrained_embeddings)
        self.embedding.weight.requires_grad = False
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, 
                              bidirectional=True, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        embedded = self.dropout(self.embedding(text))
        _, (hidden, _) = self.lstm(embedded)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        return self.fc(hidden)

model1 = BiLSTMClassifier(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_CLASSES, N_LAYERS, DROPOUT, glove_embeddings).to(DEVICE)

### 3.3. 训练与评估（模型一）

In [4]:
def train(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0
    # OPTIMIZATION: Add tqdm progress bar
    for batch in tqdm(iterator, desc="Training (M1)", leave=False):
        text, labels = batch
        text, labels = text.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        predictions = model(text)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator)

def evaluate(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        # OPTIMIZATION: Add tqdm progress bar
        for batch in tqdm(iterator, desc="Evaluating (M1)", leave=False):
            text, labels = batch
            text, labels = text.to(DEVICE), labels.to(DEVICE)
            predictions = model(text)
            loss = criterion(predictions, labels)
            epoch_loss += loss.item()
            all_preds.extend(predictions.argmax(1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    preds_rmse = np.array(all_preds) + 1
    labels_rmse = np.array(all_labels) + 1
    rmse = np.sqrt(mean_squared_error(labels_rmse, preds_rmse))
    return epoch_loss / len(iterator), acc, f1, rmse

# --- 开始训练 ---
print("开始训练模型一...")
optimizer = optim.Adam(model1.parameters())
criterion = nn.CrossEntropyLoss().to(DEVICE)
results = {}

for epoch in range(EPOCHS):
    train_loss = train(model1, train_loader, optimizer, criterion)
    valid_loss, valid_acc, valid_f1, valid_rmse = evaluate(model1, val_loader, criterion)
    print(f'轮次: {epoch+1:02} | 训练损失: {train_loss:.3f} | 验证损失: {valid_loss:.3f} | 验证Acc: {valid_acc*100:.2f}% | 验证F1: {valid_f1:.3f} | 验证RMSE: {valid_rmse:.3f}')

test_loss, test_acc, test_f1, test_rmse = evaluate(model1, test_loader, criterion)
print(f'\n模型一 测试集结果 -> Acc: {test_acc*100:.2f}% | F1: {test_f1:.3f} | RMSE: {test_rmse:.3f}')
results['模型一 (GloVe+BiLSTM)'] = {'Accuracy': test_acc, 'Macro-F1': test_f1, 'RMSE': test_rmse}

开始训练模型一...


  from .autonotebook import tqdm as notebook_tqdm
                                                                  

轮次: 01 | 训练损失: 2.075 | 验证损失: 1.989 | 验证Acc: 23.47% | 验证F1: 0.095 | 验证RMSE: 2.225


                                                                  

轮次: 02 | 训练损失: 1.875 | 验证损失: 1.748 | 验证Acc: 30.04% | 验证F1: 0.210 | 验证RMSE: 1.765


                                                                  

轮次: 03 | 训练损失: 1.743 | 验证损失: 1.654 | 验证Acc: 34.07% | 验证F1: 0.237 | 验证RMSE: 1.603


                                                                  

轮次: 04 | 训练损失: 1.651 | 验证损失: 1.549 | 验证Acc: 38.15% | 验证F1: 0.305 | 验证RMSE: 1.506


                                                                  

轮次: 05 | 训练损失: 1.576 | 验证损失: 1.535 | 验证Acc: 39.30% | 验证F1: 0.302 | 验证RMSE: 1.558


                                                                  


模型一 测试集结果 -> Acc: 39.07% | F1: 0.296 | RMSE: 1.579




## 4. 模型二 & 三：基于BERT的模型

### 4.1. BERT数据准备 (已优化)

**核心优化**：我们不再在`Dataset`中对每个样本进行实时分词。取而代之，我们先一次性地将整个文本数据集分词，并将结果（`input_ids`, `attention_mask`）存储起来。新的`Dataset`类将直接、快速地从这些预处理好的张量中读取数据，从而消除CPU瓶颈。

In [5]:
from transformers import BertTokenizer

# --- 参数配置 ---
BERT_MODEL_NAME = 'bert-base-uncased'
MAX_LEN_BERT = 256 # OPTIMIZATION: 从512缩减到256，减少计算和显存，8GB显存下更安全
BATCH_SIZE_BERT = 64 # OPTIMIZATION: 从32增加到64，提高GPU利用率

local_bert_path = 'D:/models/bert-base-uncased'

import os
if not os.path.exists(local_bert_path):
    # 如果本地没有，尝试从Hugging Face Hub下载
    print(f"本地路径 {local_bert_path} 不存在，尝试从网络下载 {BERT_MODEL_NAME}...")
    try:
        tokenizer_bert = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
        # 如果需要保存到本地
        # tokenizer_bert.save_pretrained(local_bert_path)
    except Exception as e:
        raise RuntimeError(f"无法下载BERT模型，请检查网络或手动下载。错误: {e}")
else:
    print(f"从本地路径加载BERT分词器: {local_bert_path}")
    tokenizer_bert = BertTokenizer.from_pretrained(local_bert_path)

# --- OPTIMIZATION: 预分词函数 ---
def pre_tokenize_data(df, tokenizer, max_len):
    print(f"正在对 {len(df)} 条数据进行预分词...")
    # 使用tokenizer的批量编码功能，速度非常快
    encodings = tokenizer.batch_encode_plus(
        df['text'].tolist(),
        add_special_tokens=True,
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt',
    )
    return encodings['input_ids'], encodings['attention_mask'], torch.tensor(df['label'].tolist())

# --- OPTIMIZATION: 预分词所有数据集 ---
train_input_ids, train_attention_mask, train_labels = pre_tokenize_data(df_train, tokenizer_bert, MAX_LEN_BERT)
val_input_ids, val_attention_mask, val_labels = pre_tokenize_data(df_val, tokenizer_bert, MAX_LEN_BERT)
test_input_ids, test_attention_mask, test_labels = pre_tokenize_data(df_test, tokenizer_bert, MAX_LEN_BERT)

# --- OPTIMIZATION: 使用预分词数据的简化版Dataset ---
class PretokenizedIMDBDataset(Dataset):
    def __init__(self, input_ids, attention_mask, labels):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'labels': self.labels[idx]
        }

# --- 创建DataLoaders ---
use_cuda = torch.cuda.is_available()
dataloader_args = {
    "batch_size": BATCH_SIZE_BERT,
    # 在Windows的Jupyter中，num_workers>0可能出问题，设为0最安全。预分词后，这里不再是瓶颈
    "num_workers": 0, 
    "pin_memory": True if use_cuda else False
}

train_dataset_bert = PretokenizedIMDBDataset(train_input_ids, train_attention_mask, train_labels)
val_dataset_bert = PretokenizedIMDBDataset(val_input_ids, val_attention_mask, val_labels)
test_dataset_bert = PretokenizedIMDBDataset(test_input_ids, test_attention_mask, test_labels)

train_loader_bert = DataLoader(train_dataset_bert, shuffle=True, **dataloader_args)
val_loader_bert = DataLoader(val_dataset_bert, **dataloader_args)
test_loader_bert = DataLoader(test_dataset_bert, **dataloader_args)

print("\n用于BERT的数据准备完成 。")

从本地路径加载BERT分词器: D:/models/bert-base-uncased
正在对 67426 条数据进行预分词...
正在对 8381 条数据进行预分词...
正在对 9112 条数据进行预分词...

用于BERT的数据准备完成 (已优化)。


### 4.2. 模型二：BERT嵌入 + BiLSTM

In [6]:
from transformers import BertModel

class BertBiLSTMClassifier(nn.Module):
    def __init__(self, bert, hidden_dim, output_dim, n_layers, dropout):
        super().__init__()
        self.bert = bert
        embedding_dim = bert.config.to_dict()['hidden_size']
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, 
                              bidirectional=True, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_ids, attention_mask):
        # 冻结BERT，不计算梯度，节省计算资源
        with torch.no_grad():
            # BERT返回 last_hidden_state, pooler_output
            embedded = self.bert(input_ids=input_ids, attention_mask=attention_mask)[0]
        
        # LSTM和FC层需要计算梯度
        _, (hidden, _) = self.lstm(embedded)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        return self.fc(hidden)

# --- 实例化模型二 ---
bert_model_for_bilstm = BertModel.from_pretrained(local_bert_path if os.path.exists(local_bert_path) else BERT_MODEL_NAME)
# 冻结BERT参数
for param in bert_model_for_bilstm.parameters():
    param.requires_grad = False

model2 = BertBiLSTMClassifier(bert_model_for_bilstm, HIDDEN_DIM, NUM_CLASSES, N_LAYERS, DROPOUT).to(DEVICE)

### 4.3. 训练与评估（模型二）(已优化)

In [7]:
from torch.cuda.amp import GradScaler, autocast

def train_bert_based(model, iterator, optimizer, criterion, scaler):
    model.train()
    epoch_loss = 0
    # OPTIMIZATION: Add tqdm progress bar
    for batch in tqdm(iterator, desc="Training (M2)", leave=False):
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        labels = batch['labels'].to(DEVICE)
        
        optimizer.zero_grad()

        with autocast(): # 混合精度
            predictions = model(input_ids, attention_mask)
            loss = criterion(predictions, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        epoch_loss += loss.item()
    return epoch_loss / len(iterator)

def evaluate_bert_based(model, iterator, criterion, model_name="M2"):
    model.eval()
    epoch_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        # OPTIMIZATION: Add tqdm progress bar
        for batch in tqdm(iterator, desc=f"Evaluating ({model_name})", leave=False):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)
            
            with autocast(): # 混合精度
                predictions = model(input_ids, attention_mask)
                loss = criterion(predictions, labels)

            epoch_loss += loss.item()
            all_preds.extend(predictions.argmax(1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    preds_rmse = np.array(all_preds) + 1
    labels_rmse = np.array(all_labels) + 1
    rmse = np.sqrt(mean_squared_error(labels_rmse, preds_rmse))
    
    return epoch_loss / len(iterator), acc, f1, rmse

# --- 训练模型二---
print("\n开始训练模型二...")
EPOCHS_BERT = 3
optimizer = optim.Adam(model2.parameters())
criterion = nn.CrossEntropyLoss().to(DEVICE)
scaler = GradScaler()

for epoch in range(EPOCHS_BERT):
    train_loss = train_bert_based(model2, train_loader_bert, optimizer, criterion, scaler)
    valid_loss, valid_acc, valid_f1, valid_rmse = evaluate_bert_based(model2, val_loader_bert, criterion)
    print(f'轮次: {epoch+1:02} | 训练损失: {train_loss:.3f} | 验证损失: {valid_loss:.3f} | 验证Acc: {valid_acc*100:.2f}% | 验证F1: {valid_f1:.3f} | 验证RMSE: {valid_rmse:.3f}')

test_loss, test_acc, test_f1, test_rmse = evaluate_bert_based(model2, test_loader_bert, criterion)
print(f'\n模型二 测试集结果 -> Acc: {test_acc*100:.2f}% | F1: {test_f1:.3f} | RMSE: {test_rmse:.3f}')
results['模型二 (BERT嵌入+BiLSTM)'] = {'Accuracy': test_acc, 'Macro-F1': test_f1, 'RMSE': test_rmse}


开始训练模型二...


  attn_output = torch.nn.functional.scaled_dot_product_attention(
                                                                  

轮次: 01 | 训练损失: 1.840 | 验证损失: 1.726 | 验证Acc: 30.29% | 验证F1: 0.210 | 验证RMSE: 1.765


                                                                  

轮次: 02 | 训练损失: 1.702 | 验证损失: 1.645 | 验证Acc: 33.99% | 验证F1: 0.257 | 验证RMSE: 1.764


                                                                  

轮次: 03 | 训练损失: 1.653 | 验证损失: 1.633 | 验证Acc: 32.88% | 验证F1: 0.259 | 验证RMSE: 1.712


                                                                  


模型二 测试集结果 -> Acc: 32.95% | F1: 0.252 | RMSE: 1.763




### 4.4. 模型三：微调BERT

In [9]:
from transformers import BertForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW

# --- 实例化模型三 ---
model3 = BertForSequenceClassification.from_pretrained(
    local_bert_path if os.path.exists(local_bert_path) else BERT_MODEL_NAME,
    num_labels=NUM_CLASSES,
).to(DEVICE)

# --- 优化器与学习率调度器 ---
optimizer3 = AdamW(model3.parameters(), lr=2e-5, eps=1e-8)
total_steps = len(train_loader_bert) * EPOCHS_BERT
scheduler3 = get_linear_schedule_with_warmup(optimizer3, num_warmup_steps=0, num_training_steps=total_steps)
criterion3 = nn.CrossEntropyLoss().to(DEVICE)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at D:/models/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### 4.5. 训练与评估（模型三）

In [10]:
def train_finetune(model, iterator, optimizer, scheduler, criterion, scaler):
    model.train()
    epoch_loss = 0
    # OPTIMIZATION: Add tqdm progress bar
    for batch in tqdm(iterator, desc="Training (M3)", leave=False):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        labels = batch['labels'].to(DEVICE)

        with autocast(): # 混合精度
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
        
        epoch_loss += loss.item()

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

    return epoch_loss / len(iterator)

def evaluate_finetune(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        # OPTIMIZATION: Add tqdm progress bar
        for batch in tqdm(iterator, desc="Evaluating (M3)", leave=False):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)

            with autocast(): # 混合精度
                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs.loss
                logits = outputs.logits

            epoch_loss += loss.item()
            all_preds.extend(logits.argmax(1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    preds_rmse = np.array(all_preds) + 1
    labels_rmse = np.array(all_labels) + 1
    rmse = np.sqrt(mean_squared_error(labels_rmse, preds_rmse))
    
    return epoch_loss / len(iterator), acc, f1, rmse

# --- 训练模型三 ---
print("\n开始训练模型三...")
scaler_finetune = GradScaler()

for epoch in range(EPOCHS_BERT):
    train_loss = train_finetune(model3, train_loader_bert, optimizer3, scheduler3, criterion3, scaler_finetune)
    valid_loss, valid_acc, valid_f1, valid_rmse = evaluate_finetune(model3, val_loader_bert, criterion3)
    print(f'轮次: {epoch+1:02} | 训练损失: {train_loss:.3f} | 验证损失: {valid_loss:.3f} | 验证Acc: {valid_acc*100:.2f}% | 验证F1: {valid_f1:.3f} | 验证RMSE: {valid_rmse:.3f}')

test_loss, test_acc, test_f1, test_rmse = evaluate_finetune(model3, test_loader_bert, criterion3)
print(f'\n模型三 测试集结果 -> Acc: {test_acc*100:.2f}% | F1: {test_f1:.3f} | RMSE: {test_rmse:.3f}')
results['模型三 (微调BERT)'] = {'Accuracy': test_acc, 'Macro-F1': test_f1, 'RMSE': test_rmse}


开始训练模型三...


                                                                   

轮次: 01 | 训练损失: 1.691 | 验证损失: 1.531 | 验证Acc: 38.92% | 验证F1: 0.318 | 验证RMSE: 1.457


                                                                  

轮次: 02 | 训练损失: 1.456 | 验证损失: 1.495 | 验证Acc: 40.22% | 验证F1: 0.333 | 验证RMSE: 1.402


                                                                    

轮次: 03 | 训练损失: 1.333 | 验证损失: 1.505 | 验证Acc: 40.87% | 验证F1: 0.365 | 验证RMSE: 1.400


                                                                  


模型三 测试集结果 -> Acc: 40.50% | F1: 0.353 | RMSE: 1.446




## 5. 结果汇总与分析

In [11]:
df_results = pd.DataFrame(results).T
df_results['Accuracy'] = df_results['Accuracy'].apply(lambda x: f"{x*100:.2f}%")
df_results['Macro-F1'] = df_results['Macro-F1'].apply(lambda x: f"{x:.4f}")
df_results['RMSE'] = df_results['RMSE'].apply(lambda x: f"{x:.4f}")

print("--- IMDB-10测试集最终性能对比 ---")
print(df_results)

--- IMDB-10测试集最终性能对比 ---
                    Accuracy Macro-F1    RMSE
模型一 (GloVe+BiLSTM)    39.07%   0.2960  1.5786
模型二 (BERT嵌入+BiLSTM)   32.95%   0.2524  1.7627
模型三 (微调BERT)          40.50%   0.3531  1.4459
