In [17]:
import torch
from gpt import *


def text2token_ids(text, tokenizer):
    encoded = tokenizer.encode(text, allowed_special={"<|endoftext|>"})
    encoded_tensor = torch.tensor(encoded).unsqueeze(0)  # 添加一个维度
    return encoded_tensor


def token_ids2text(token_ids, tokenizer):
    flat = token_ids.squeeze(0)
    return tokenizer.decode(flat.tolist())


torch.manual_seed(123)
GPT_CONFIG_124M = {
    "vocab_size": 50257,
    "context_length": 1024,  # A
    "emb_dim": 768,
    "n_heads": 12,
    "n_layers": 12,
    "drop_rate": 0.1,  # B
    "qkv_bias": False,
}
model = GPTModel(GPT_CONFIG_124M)
model.eval()


start_context = "Every effort moves you"
tokenizer = tiktoken.get_encoding("gpt2")
token_ids = generate_text_simple(
    model=model,
    idx=text2token_ids(start_context, tokenizer),
    max_new_tokens=10,
    context_size=GPT_CONFIG_124M["context_length"],
)
print("Output text:\n", token_ids2text(token_ids, tokenizer))



Output text:
 Every effort moves you Aeiman Byeswickattributeometer inspector Normandy freezerigrate


In [None]:
inputs = torch.tensor(
    [[16833, 3626, 6100], [40, 1107, 588]]  # ["every effort moves",
)  # "I really like"]
targets = torch.tensor(
    [[3626, 6100, 345], [588, 428, 11311]]  # [" effort moves you",
)  # " really like chocolate"]
with torch.no_grad():  # A
    logits = model(inputs)
probas = torch.softmax(logits, dim=-1)  # 词表中每个 token 的概率
print(probas.shape)
token_ids = torch.argmax(probas, dim=-1, keepdim=True)
print("Token IDs:\n", token_ids)
print(f"Targets batch 1: {token_ids2text(targets[0], tokenizer)}")
print(f"Outputs batch 1: {token_ids2text(token_ids[0].flatten(), tokenizer)}")


text_idx = 0
target_probas_1 = probas[text_idx, [0, 1, 2], targets[text_idx]]
print("Text 1:", target_probas_1)
text_idx = 1
target_probas_2 = probas[text_idx, [0, 1, 2], targets[text_idx]]
print("Text 2:", target_probas_2)

log_probas = torch.log(torch.cat((target_probas_1, target_probas_2)))
print(log_probas)
avg_log_probas = torch.mean(log_probas)
print(avg_log_probas)
neg_avg_log_probas = avg_log_probas * -1
print(neg_avg_log_probas)
print("Logits shape:", logits.shape)
print("Targets shape:", targets.shape)
logits_flat = logits.flatten(0, 1)
targets_flat = targets.flatten()
print("Flattened logits:", logits_flat.shape)
print("Flattened targets:", targets_flat.shape)
loss = torch.nn.functional.cross_entropy(logits_flat, targets_flat) # 交叉熵损失
print(loss)

torch.Size([2, 3, 50257])
Token IDs:
 tensor([[[36397],
         [39619],
         [20610]],

        [[ 8615],
         [49289],
         [47105]]])
Targets batch 1:  effort moves you
Outputs batch 1:  Gathering SerbianFriday
Text 1: tensor([2.3466e-05, 2.0531e-05, 1.1733e-05])
Text 2: tensor([1.3380e-05, 1.3445e-05, 1.1586e-05])
tensor([-10.6600, -10.7936, -11.3531, -11.2217, -11.2169, -11.3658])
tensor(-11.1018)
tensor(11.1018)
Logits shape: torch.Size([2, 3, 50257])
Targets shape: torch.Size([2, 3])
Flattened logits: torch.Size([6, 50257])
Flattened targets: torch.Size([6])
tensor(11.1018)
