In [1]:
import torch
from torch.nn import functional as F

import tiktoken

from transformers import GPT2LMHeadModel, set_seed

from gpt2 import GPT

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
num_return_sequences = 5
max_length = 30
enc = tiktoken.get_encoding('gpt2')

In [3]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
set_seed(42)

tokens = enc.encode("Hello, I'm a language model,")

model = GPT.from_pretrained('gpt2')
model.eval()
model.to('cuda')

tokens = torch.tensor(tokens, dtype=torch.long)              # (T,)
tokens = tokens.unsqueeze(0).repeat(num_return_sequences, 1) # (B, T)
x = tokens.to('cuda')                                        # (B, T)

while x.shape[1] < max_length:
    with torch.no_grad():
        logits = model(x)                 # (B, T, vocab_size)
        logits = logits[:, -1, :]         # (B, vocab_size)
        probs = F.softmax(logits, dim=-1) # (B, vocab_size)

        # Both `topk_probs` and `topk_indices` are tensors of shape `(B, 50)`
        topk_probs, topk_indices = torch.topk(probs, 50, dim=-1)

        ix = torch.multinomial(topk_probs, num_samples=1)   # (B, 1)
        xcol = torch.gather(topk_indices, dim=-1, index=ix) # (B, 1)
        x = torch.cat((x, xcol), dim=1)                     # (B, T+1)

for i in range(num_return_sequences):
    tokens = x[i, :max_length].tolist()
    decoded = enc.decode(tokens)
    print(f'\n> {decoded}')

Loading weights from pretrained GPT gpt2

> Hello, I'm a language model, not a program.

So this morning I started studying for the interview in the lab. This was not

> Hello, I'm a language model, and one of the reasons I love studying languages, to think that it can be a lot easier for those who

> Hello, I'm a language model, and I wrote it off on the grounds that a language model would make me more fluent. But I'm not

> Hello, I'm a language model, I really like languages. I like languages because like, they're good. And the way we talk about languages

> Hello, I'm a language model, a language model I'm using for data modelling. All I did was test the results and then I wrote some


In [4]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
set_seed(42)

tokens = enc.encode("Hello, I'm a language model,")
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.eval()
model.to('cuda')

tokens = torch.tensor(tokens, dtype=torch.long)              # (T,)
tokens = tokens.unsqueeze(0).repeat(num_return_sequences, 1) # (B, T)
x = tokens.to('cuda')                                        # (B, T)

while x.shape[1] < max_length:
    with torch.no_grad():
        logits = model(x)[0]              # (B, T, vocab_size)
        logits = logits[:, -1, :]         # (B, vocab_size)
        probs = F.softmax(logits, dim=-1) # (B, vocab_size)

        # Both `topk_probs` and `topk_indices` are tensors of shape `(B, 50)`
        topk_probs, topk_indices = torch.topk(probs, 50, dim=-1)

        ix = torch.multinomial(topk_probs, num_samples=1)   # (B, 1)
        xcol = torch.gather(topk_indices, dim=-1, index=ix) # (B, 1)
        x = torch.cat((x, xcol), dim=1)                     # (B, T+1)

for i in range(num_return_sequences):
    tokens = x[i, :max_length].tolist()
    decoded = enc.decode(tokens)
    print(f'\n> {decoded}')


> Hello, I'm a language model, not a program.

So this morning I started studying for the interview in the lab. This was not

> Hello, I'm a language model, and one of the main things that bothers me when they create languages is how easy it becomes to create something that

> Hello, I'm a language model, and I wrote it off on the grounds that a language model would make me more fluent. But I'm not

> Hello, I'm a language model, I really like languages. I like languages because like, they're good. And the way we talk about languages

> Hello, I'm a language model, a language model I'm using for data modelling. All I did was test the results and then I wrote some
