In [39]:
from transformers import AutoModelForSequenceClassification, BertTokenizer
import torch
import pandas as pd
from tqdm import tqdm

In [40]:
# 加载模型和 tokenizer
model = AutoModelForSequenceClassification.from_pretrained("./trainer/test_trainer-sft-diy/checkpoint-3750")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
# 读取测试数据
# test_df = pd.read_csv("./data/test/NSL-KDD-10000.csv")
# test_df = pd.read_csv("./data/test/CICIDS2018-10000.csv")
# test_df = pd.read_csv("./data/test/KDD99-10000.csv")
test_df = pd.read_csv("./data/test/UNSW_NB15-10000.csv")
# test_df = pd.read_csv("./data/test/X-IIoTID-10000.csv")
texts = test_df["flow"].tolist()

In [41]:
# 获取最大文本长度（与训练时逻辑一致）
text_lengths = [len(text.split(",")) for text in texts]
max_length = max(text_lengths)
max_length

43

In [42]:
# 逗号分词 + 重新转换为 BERT 需要的 `input_ids`
def custom_tokenize_function(texts):
    # 逗号分词
    split_texts = [[phrase.strip() for phrase in text.split(",")] for text in texts]
    # 将每个短语转换为 BERT input_ids
    encodings = tokenizer(
        split_texts, 
        padding="max_length", 
        truncation=True, 
        max_length=max_length,
        return_tensors="pt",
        is_split_into_words=True  # 让 tokenizer 处理手动分词后的文本
    )
    return encodings

In [43]:
# 关闭梯度计算，加速推理
model.eval()
torch.set_grad_enabled(False)
device = torch.device("cuda")
model.to(device)
# 进度条 & 批量推理
batch_size = 16  # 设置合适的 batch size
predictions = []
for i in tqdm(range(0, len(texts), batch_size), desc="预测进度", unit="batch"):
    batch_texts = texts[i : i + batch_size]
    inputs = custom_tokenize_function(batch_texts)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    # 运行模型预测
    outputs = model(**inputs)
    batch_predictions = torch.argmax(outputs.logits, dim=-1).tolist()
    predictions.extend(batch_predictions)

预测进度: 100%|██████████| 625/625 [00:16<00:00, 37.99batch/s]


In [44]:
# 计算指标
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
if "class" in test_df.columns:
    true_labels = test_df["class"].tolist()
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='binary')  # 二分类使用binary
    recall = recall_score(true_labels, predictions, average='binary')
    f1 = f1_score(true_labels, predictions, average='binary')
    print(f"\n模型在测试集上的指标:")
    print(f"准确率 (Accuracy): {accuracy:.4f}")
    print(f"精确率 (Precision): {precision:.4f}")
    print(f"召回率 (Recall): {recall:.4f}")
    print(f"F1 分数 (F1-score): {f1:.4f}")


模型在测试集上的指标:
准确率 (Accuracy): 0.6506
精确率 (Precision): 0.0000
召回率 (Recall): 0.0000
F1 分数 (F1-score): 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
