In [22]:
import os
import math
import torch
import torch.nn.functional as F

from transformers import AutoTokenizer
from src.gpt2 import GPT2, GPT2Config

In [23]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [24]:
ckpt_path = r"C:\workspace\GPT2\models\ckpt_step6000.pt"
ckpt = torch.load(ckpt_path, map_location=device)

ckpt.keys()

dict_keys(['step', 'config', 'model_state', 'optim_state'])

In [25]:
config = GPT2Config(**ckpt["config"])
model = GPT2(config).to(device)
model.load_state_dict(ckpt["model_state"])
model.eval()

GPT2(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(512, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0-11): 12 x Block(
      (ln_1): LayerNorm()
      (attn): CausalSelfAttention(
        (c_attn): Linear(in_features=768, out_features=2304, bias=True)
        (c_proj): Linear(in_features=768, out_features=768, bias=True)
        (attn_drop): Dropout(p=0.1, inplace=False)
        (resid_drop): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm()
      (mlp): MLP(
        (c_fc): Linear(in_features=768, out_features=3072, bias=True)
        (c_proj): Linear(in_features=3072, out_features=768, bias=True)
        (drop): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln_f): LayerNorm()
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [26]:
print("Loaded step:", ckpt["step"])
print("Model device:", next(model.parameters()).device)

Loaded step: 6000
Model device: cuda:0


In [27]:
tokenizer = AutoTokenizer.from_pretrained("gpt2", local_files_only=False)
tokenizer.pad_token = tokenizer.eos_token



In [28]:
x = torch.randint(
    low=0,
    high=config.vocab_size,
    size=(2, 32),
    device=device
)

with torch.no_grad():
    logits, loss = model(x, x)

logits.shape, loss.item()

(torch.Size([2, 32, 50257]), 14.503641128540039)

In [29]:
def eval_text(model, tokenizer, text):
    model.eval()
    enc = tokenizer(text, return_tensors="pt")
    idx = enc["input_ids"].to(device)

    with torch.no_grad():
        _, loss = model(idx, idx)

    ppl = math.exp(loss.item())
    return loss.item(), ppl

In [30]:
text = "In the beginning, the universe was"
loss, ppl = eval_text(model, tokenizer, text)
loss, ppl

(10.439993858337402, 34200.442389666554)

In [31]:
@torch.no_grad()
def generate(
    model,
    tokenizer,
    prompt,
    max_new_tokens=100,
    temperature=0.8,
    top_k=50
):
    model.eval()
    idx = tokenizer(prompt, return_tensors="pt")["input_ids"].to(device)

    for _ in range(max_new_tokens):
        idx_cond = idx[:, -model.config.block_size:]
        logits, _ = model(idx_cond)

        logits = logits[:, -1, :] / temperature

        if top_k is not None:
            v, _ = torch.topk(logits, top_k)
            logits[logits < v[:, [-1]]] = -float("inf")

        probs = F.softmax(logits, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1)

        idx = torch.cat([idx, next_id], dim=1)

    return tokenizer.decode(idx[0], skip_special_tokens=True)

In [36]:
print(generate(
    model,
    tokenizer,
    prompt="I'm trying to say hello",
    max_new_tokens=120,
    temperature=0.9,
    top_k=40
))

I'm trying to say hello that it is a teenager on a trip where he was a former film film for its sequel . 
 = = Personal life = = 
 Following his career , he was appointed as a strong pitcher in March 1935 . He was promoted to England , and won the Royal Australian National Museum of California . The last league career , he made his first career career career at West Yorkshire and was in January 1936 , and it won the Order of California at Cardiff 's School . He later participated in the South Australian Royal Marine Artillery , and three hits at Oxford University College . He attended
