In [11]:
from datasets import load_dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import math
from tqdm import tqdm

# 加载模型和 tokenizer
model_path = "./gpt2-lambada/checkpoint-500"  # ← 改为你的保存路径
model = GPT2LMHeadModel.from_pretrained(model_path)
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token  # 兼容 padding

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [12]:
# 加载 LAMBADA 验证集或测试集
dataset = load_dataset("lambada", split="validation")

# 评估函数
def compute_perplexity(dataset, max_samples=500):
    losses = []
    for sample in tqdm(dataset.select(range(max_samples))):
        enc = tokenizer(sample["text"], return_tensors="pt", truncation=True, max_length=512)
        input_ids = enc.input_ids.to(device)

        with torch.no_grad():
            outputs = model(input_ids, labels=input_ids)
            loss = outputs.loss
            losses.append(loss.item())

    avg_loss = sum(losses) / len(losses)
    perplexity = math.exp(avg_loss)
    print(f"✅ Avg loss: {avg_loss:.4f} | Perplexity: {perplexity:.2f}")

# 运行评估
compute_perplexity(dataset)

100%|██████████| 500/500 [00:02<00:00, 212.87it/s]

✅ Avg loss: 3.8004 | Perplexity: 44.72





In [24]:
def generate_example(prompt="The sun was setting behind the", max_new_tokens=20):
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    output = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=True, top_k=50)
    print("Generated:", tokenizer.decode(output[0], skip_special_tokens=True))

generate_example()

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated: The sun was setting behind the horizon when a bright light shot through the open window , bright enough for us to see it . 
