In [9]:
import torch
from transformers import BertTokenizer, BertForNextSentencePrediction

# 加载BERT模型和分词器
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 准备训练数据
sentence_a = "The quick brown fox jumps over the lazy dog."
sentence_b = "The dog was not amused."
sentence_c = "A completely random sentence."

# 正样本（标签为1）
positive_input = tokenizer(sentence_a, sentence_b, return_tensors='pt', padding='max_length', max_length=64, truncation=True)
positive_labels = torch.tensor([1])

# 负样本（标签为0）
negative_input = tokenizer(sentence_a, sentence_c, return_tensors='pt', padding='max_length', max_length=64, truncation=True)
negative_labels = torch.tensor([0])

# 合并输入和标签
inputs = {key: torch.cat([positive_input[key], negative_input[key]]) for key in positive_input}
labels = torch.cat([positive_labels, negative_labels])

# 训练模型
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
model.train()

for epoch in range(10):
    optimizer.zero_grad()
    outputs = model(**inputs, next_sentence_label=labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')


Epoch 1, Loss: 7.442768096923828
Epoch 2, Loss: 2.674093008041382
Epoch 3, Loss: 0.010730520822107792
Epoch 4, Loss: 0.0024688427802175283
Epoch 5, Loss: 0.0031436732970178127
Epoch 6, Loss: 0.003716964041814208
Epoch 7, Loss: 0.00424676900729537
Epoch 8, Loss: 0.003011136082932353
Epoch 9, Loss: 0.004186884965747595
Epoch 10, Loss: 0.002674547955393791
