## 检查硬件加速配置

In [1]:
import torch

print("PyTorch版本:", torch.__version__)
print("CUDA是否可用:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("当前GPU设备:", torch.cuda.get_device_name(0))
    print("CUDA版本:", torch.version.cuda)
else:
    print("未检测到CUDA设备")

PyTorch版本: 2.5.1+cu124
CUDA是否可用: True
当前GPU设备: NVIDIA A10
CUDA版本: 12.4


## 数据预处理

In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer

# 合并数据集
def merge_data(stances_path, bodies_path):
    stances = pd.read_csv(stances_path)
    bodies = pd.read_csv(bodies_path)
    merged = pd.merge(stances, bodies, on='Body ID')
    return merged[['Headline', 'articleBody', 'Stance']]

train_data = merge_data('train_stances.csv', 'train_bodies.csv')
test_data = merge_data('competition_test_stances.csv', 'competition_test_bodies.csv')

# 文本预处理
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def preprocess(text):
    return tokenizer(text, 
                   padding='max_length',
                   truncation=True,
                   max_length=512,
                   return_tensors='pt')

# 创建数据集
class NewsDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.texts = [preprocess(row['Headline'] + " [SEP] " + row['articleBody']) 
                     for _, row in df.iterrows()]
        self.labels = torch.tensor(pd.get_dummies(df['Stance']).values.argmax(1))

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': self.texts[idx]['input_ids'].squeeze(),
            'attention_mask': self.texts[idx]['attention_mask'].squeeze(),
            'labels': self.labels[idx]
        }

# 划分训练验证集
train_df, val_df = train_test_split(train_data, test_size=0.2)
train_dataset = NewsDataset(train_df)
val_dataset = NewsDataset(val_df)
test_dataset = NewsDataset(test_data)

  from .autonotebook import tqdm as notebook_tqdm


## 模型构建（基于BERT的改进方案）

In [None]:
from transformers import BertForSequenceClassification, TrainingArguments, Trainer
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# 加载预训练模型
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=4,
    output_attentions=False,
    output_hidden_states=False
)

# 自定义评估指标
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    return {'accuracy': acc, 'f1': f1}

# 训练参数调整
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=50,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    fp16=True, # 保持开启混合精度（A10支持Tensor Core加速）
    gradient_accumulation_steps=1, # 显存充足时可保持为1
    dataloader_num_workers=4, # 增加数据加载线程
)

# 初始化Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## 模型训练

In [13]:
# 开始训练
trainer.train()

# 保存最佳模型
trainer.save_model("best_model")

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1234,0.134308,0.953077,0.94716
2,0.0493,0.060915,0.983992,0.983629
3,0.025,0.040607,0.990295,0.990243


## 模型测试

In [14]:
# 加载测试集
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)

# 测试函数
def evaluate(model, dataloader):
    model.eval()
    predictions, true_labels = [], []
    
    with torch.no_grad():
        for batch in dataloader:
            inputs = {
                'input_ids': batch['input_ids'].to('cuda'),
                'attention_mask': batch['attention_mask'].to('cuda'),
                'labels': batch['labels'].to('cuda')
            }
            outputs = model(**inputs)
            logits = outputs.logits
            
            predictions.extend(logits.argmax(dim=1).cpu().numpy())
            true_labels.extend(inputs['labels'].cpu().numpy())
    
    return {
        'accuracy': accuracy_score(true_labels, predictions),
        'f1_score': f1_score(true_labels, predictions, average='weighted')
    }

# 执行测试
results = evaluate(model, test_loader)
print(f"Test Accuracy: {results['accuracy']:.4f}")
print(f"Test F1 Score: {results['f1_score']:.4f}")

Test Accuracy: 0.9228
Test F1 Score: 0.9209
