In [33]:
from transformers import AutoModelForSequenceClassification, BertTokenizer
import torch
import pandas as pd
from tqdm import tqdm
from peft import PeftModel

In [34]:
# 读取测试数据
# test_df = pd.read_csv("./data/test/NSL-KDD-10000.csv")
# test_df = pd.read_csv("./data/test/KDD99-10000.csv")
# test_df = pd.read_csv("./data/test/UNSW_NB15-10000.csv")
# test_df = pd.read_csv("./data/test/X-IIoTID-10000.csv")
# test_df = pd.read_csv("./data/test/NSL-KDD-Gamma.csv")
# test_df = pd.read_csv("./data/test/NSL-KDD-Gaussian.csv")
# test_df = pd.read_csv("./data/test/NSL-KDD-Laplace.csv")
# test_df = pd.read_csv("./data/test/NSL-KDD-Poisson.csv")
# test_df = pd.read_csv("./data/test/NSL-KDD-Uniform.csv")
test_df = pd.read_csv("./data/test/NSL-KDD-Cauchy.csv")
texts = test_df["flow"].tolist()

In [42]:
# 加载基础模型和LoRA适配器
base_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
# model = PeftModel.from_pretrained(base_model, "./test_trainer-sft-diy-lora/checkpoint-12500")
model = PeftModel.from_pretrained(base_model, "./test_trainer-sft-diy-lora-mix/checkpoint-95000")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [36]:
# 获取最大文本长度（与训练时逻辑一致）
text_lengths = [len(text.split(",")) for text in texts]
max_length = max(text_lengths)
max_length

41

In [37]:
# 复用训练时的分词函数
def custom_tokenize_function(texts):
    split_texts = [[phrase.strip() for phrase in text.split(",")] for text in texts]
    encodings = tokenizer(
        split_texts,
        padding="max_length",
        truncation=True,
        max_length=max_length,
        return_tensors="pt",
        is_split_into_words=True
    )
    return encodings

In [38]:
# 推理配置
model.eval()
torch.set_grad_enabled(False)
device = torch.device("cuda")
model.to(device)

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default

In [39]:
# 批量预测（带进度条）
batch_size = 16
predictions = []
for i in tqdm(range(0, len(texts), batch_size), 
              desc="预测进度", 
              unit="batch",
              bar_format="{l_bar}{bar:20}{r_bar}"):
    batch_texts = texts[i:i+batch_size]
    inputs = custom_tokenize_function(batch_texts)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    outputs = model(**inputs)
    batch_preds = torch.argmax(outputs.logits, dim=-1).cpu().tolist()
    predictions.extend(batch_preds)


预测进度: 100%|████████████████████| 625/625 [00:33<00:00, 18.88batch/s]


In [40]:
# 计算指标
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
if "class" in test_df.columns:
    true_labels = test_df["class"].tolist()
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='binary')  # 二分类使用binary
    recall = recall_score(true_labels, predictions, average='binary')
    f1 = f1_score(true_labels, predictions, average='binary')
    print(f"\n模型在测试集上的指标:")
    print(f"准确率 (Accuracy): {accuracy:.4f}")
    print(f"精确率 (Precision): {precision:.4f}")
    print(f"召回率 (Recall): {recall:.4f}")
    print(f"F1 分数 (F1-score): {f1:.4f}")


模型在测试集上的指标:
准确率 (Accuracy): 0.8386
精确率 (Precision): 0.8300
召回率 (Recall): 0.8833
F1 分数 (F1-score): 0.8558
