In [2]:
import torch
from transformers import BertForSequenceClassification, BertTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, ClassLabel
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, f1_score, classification_report, accuracy_score

# 检查设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 从 CSV 加载数据
data_files = {"data": "../datasets/datasets.csv"}
dataset = load_dataset("csv", data_files=data_files)

# 将标签中的 . 替换为 _
def replace_dot_with_underscore(example):
    return {"label": example["label"].replace(".", "_")}
dataset = dataset.map(replace_dot_with_underscore)

# 获取唯一的标签值
unique_labels = sorted(set(dataset["data"]["label"]))
print("唯一的标签值:", unique_labels)

# 将 label 列转换为 ClassLabel 类型
dataset = dataset.cast_column("label", ClassLabel(names=unique_labels))

# 打印数据集信息
print(dataset)


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
唯一的标签值: ['cs_AI', 'cs_CE', 'cs_DS', 'cs_IT', 'cs_NE', 'cs_PL', 'cs_SY', 'cs_cv', 'math_AC', 'math_GR', 'math_ST']
DatasetDict({
    data: Dataset({
        features: ['review_id', 'label', 'keywords', 'keysentences', 'abstract'],
        num_rows: 550
    })
})


In [3]:
# 将 keywords、keysentences 和 abstract 拼接成 text
def concatenate_text(example):
    text = ' '.join([str(example['keywords']), str(example['keysentences']), str(example['abstract'])])
    return {'text': text}
dataset = dataset.map(concatenate_text)

# 划分训练集和验证集，80%训练，20%验证
dataset = dataset['data'].train_test_split(test_size=0.2, stratify_by_column='label', seed=42)
train_dataset = dataset['train']
eval_dataset = dataset['test']

# 获取分类个数
num_classes = len(train_dataset.features['label'].names)
print(f"Number of classes: {num_classes}")

# 加载预训练的 BERT 模型和 tokenizer
model_name = 'bert-base-uncased'  # 或者选择其他适合的 BERT 变体，如 'bert-large-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
model.to(device)

# 数据预处理
def encode_batch(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=512)  # BERT 的最大序列长度为 512

encoded_train_dataset = train_dataset.map(encode_batch, batched=True, batch_size=32)
encoded_eval_dataset = eval_dataset.map(encode_batch, batched=True, batch_size=32)

Number of classes: 11


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
# 设置参数，用 Trainer() 创建实例
training_args = TrainingArguments(
    output_dir='./results',
    learning_rate=2e-5,
    num_train_epochs=5,
    weight_decay=0.01,
    per_device_train_batch_size=16,  # 根据 GPU 内存调整
    per_device_eval_batch_size=16,
    warmup_steps=500,
    evaluation_strategy='epoch',
    logging_dir='./logs',
    save_strategy='epoch',  # 保存策略
    load_best_model_at_end=True,  # 在训练结束时加载最好的模型
    metric_for_best_model='macro_f1',  # 用于选择最好的模型的指标
)

# 定义 compute_metrics 函数
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    acc = accuracy_score(labels, preds)
    macro_f1 = f1_score(labels, preds, average='macro')
    return {"accuracy": acc, "macro_f1": macro_f1}

# 创建 Trainer 实例
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train_dataset,
    eval_dataset=encoded_eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


  trainer = Trainer(


In [5]:
# 训练模型
trainer.train()

                                                
 20%|██        | 28/140 [02:01<05:58,  3.21s/it]

{'eval_loss': 2.4291977882385254, 'eval_accuracy': 0.07272727272727272, 'eval_macro_f1': 0.0313337456194599, 'eval_runtime': 3.1545, 'eval_samples_per_second': 34.87, 'eval_steps_per_second': 2.219, 'epoch': 1.0}


                                                
 40%|████      | 56/140 [04:08<04:37,  3.31s/it]

{'eval_loss': 2.3909237384796143, 'eval_accuracy': 0.13636363636363635, 'eval_macro_f1': 0.07963052989790959, 'eval_runtime': 3.5444, 'eval_samples_per_second': 31.035, 'eval_steps_per_second': 1.975, 'epoch': 2.0}


                                                
 60%|██████    | 84/140 [06:20<03:10,  3.39s/it]

{'eval_loss': 2.354708194732666, 'eval_accuracy': 0.12727272727272726, 'eval_macro_f1': 0.0779904398986178, 'eval_runtime': 3.7805, 'eval_samples_per_second': 29.097, 'eval_steps_per_second': 1.852, 'epoch': 3.0}


                                                 
 80%|████████  | 112/140 [08:33<01:35,  3.42s/it]

{'eval_loss': 2.364818811416626, 'eval_accuracy': 0.14545454545454545, 'eval_macro_f1': 0.09235332276214486, 'eval_runtime': 4.0226, 'eval_samples_per_second': 27.346, 'eval_steps_per_second': 1.74, 'epoch': 4.0}


                                                 
100%|██████████| 140/140 [10:48<00:00,  3.29s/it]

{'eval_loss': 2.240762948989868, 'eval_accuracy': 0.3, 'eval_macro_f1': 0.24660158304387503, 'eval_runtime': 2.3303, 'eval_samples_per_second': 47.205, 'eval_steps_per_second': 3.004, 'epoch': 5.0}


100%|██████████| 140/140 [10:56<00:00,  4.69s/it]

{'train_runtime': 656.1073, 'train_samples_per_second': 3.353, 'train_steps_per_second': 0.213, 'train_loss': 2.358786228724888, 'epoch': 5.0}





TrainOutput(global_step=140, training_loss=2.358786228724888, metrics={'train_runtime': 656.1073, 'train_samples_per_second': 3.353, 'train_steps_per_second': 0.213, 'total_flos': 578891096678400.0, 'train_loss': 2.358786228724888, 'epoch': 5.0})

In [6]:
# 评估模型
results = trainer.evaluate()
print("Evaluation Results:", results)

# 获取预测结果
predictions = trainer.predict(encoded_eval_dataset)
preds = np.argmax(predictions.predictions, axis=1)
labels = predictions.label_ids

100%|██████████| 7/7 [00:01<00:00,  4.41it/s]


Evaluation Results: {'eval_loss': 2.240762948989868, 'eval_accuracy': 0.3, 'eval_macro_f1': 0.24660158304387503, 'eval_runtime': 2.0777, 'eval_samples_per_second': 52.944, 'eval_steps_per_second': 3.369, 'epoch': 5.0}


100%|██████████| 7/7 [00:02<00:00,  3.21it/s]


In [1]:
# 计算混淆矩阵
cm = confusion_matrix(labels, preds)
cm_normalized = confusion_matrix(labels, preds, normalize='true')

# 获取标签名称
label_names = unique_labels

# 绘制混淆矩阵
plt.figure(figsize=(12, 10))
sns.heatmap(cm_normalized, annot=True, fmt=".2f", xticklabels=label_names, yticklabels=label_names, cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('BERT Confusion Matrix')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('./results/(test)BERT Confusion Matrix.png')
plt.show()



NameError: name 'confusion_matrix' is not defined

In [8]:
# 计算并打印 Macro F1
macro_f1 = f1_score(labels, preds, average='macro')
print(f"Macro F1 Score: {macro_f1:.4f}")

report = classification_report(labels, preds, target_names=label_names, output_dict=True)  # 使用 output_dict=True 生成字典
print("\nClassification Report:")
print(report)

# 转换分类报告为 DataFrame
df_report = pd.DataFrame(report).transpose()

# 添加 Macro F1 到分类报告
df_report.loc['macro_f1'] = {'precision': None, 'recall': None, 'f1-score': macro_f1, 'support': None}

# 指定 CSV 文件路径
output_csv_path = './results/(test)BERT_classification_report.csv'

# 保存为 CSV 文件
df_report.to_csv(output_csv_path, index=True)
print(f"Classification report has been saved to {output_csv_path}")

Macro F1 Score: 0.2466

Classification Report:
{'cs_AI': {'precision': 0.2727272727272727, 'recall': 0.3, 'f1-score': 0.2857142857142857, 'support': 10.0}, 'cs_CE': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0}, 'cs_DS': {'precision': 0.5, 'recall': 0.2, 'f1-score': 0.2857142857142857, 'support': 10.0}, 'cs_IT': {'precision': 0.1891891891891892, 'recall': 0.7, 'f1-score': 0.2978723404255319, 'support': 10.0}, 'cs_NE': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0}, 'cs_PL': {'precision': 0.4, 'recall': 0.2, 'f1-score': 0.26666666666666666, 'support': 10.0}, 'cs_SY': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0}, 'cs_cv': {'precision': 0.35294117647058826, 'recall': 0.6, 'f1-score': 0.4444444444444444, 'support': 10.0}, 'math_AC': {'precision': 0.5625, 'recall': 0.9, 'f1-score': 0.6923076923076923, 'support': 10.0}, 'math_GR': {'precision': 0.42857142857142855, 'recall': 0.3, 'f1-score': 0.35294117647058826, 'support': 10.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df_report.loc['macro_f1'] = {'precision': None, 'recall': None, 'f1-score': macro_f1, 'support': None}
