In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "gpt2-xl"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)


In [None]:
import pandas as pd

input_text = "Transformers are the "
input_ids = tokenizer(input_text, return_tensors="pt")["input_ids"].to(device)
interations = []
n_steps = 8
choices_per_step = 5

with torch.no_grad():
    for _ in range(n_steps):
        iteration = dict()
        iteration["Input"] = tokenizer.decode(input_ids[0], skip_special_tokens=True)
        output = model(input_ids=input_ids)
        # select the logits of the first batch and the last token and apply softmax
        next_token_logits = output.logits[0, -1, :]
        next_token_probs = torch.softmax(next_token_logits, dim=-1)
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)
        # Store tokens with highest probabilities
        for choice_idx in range(choices_per_step):
            token_id = sorted_ids[choice_idx]
            token_prob = next_token_probs[token_id].cpu().numpy()
            token_choice = (f"{tokenizer.decode(token_id)} ({100*token_prob:.2f}%)")
            iteration[f"Choice {choice_idx+1}"] = token_choice
        input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
        iterations.append(iteration)


pd.DataFrame(iterations)

In [None]:
input_ids = tokenizer(input_text, return_tensors="pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens=100, do_sample=False)
print(tokenizer.decode(output[0]))



In [None]:
max_length = 128

input_txt = """In a shocking finding, scientist discovered \
a herd of unicorns living in a remote, previously unexplored \
valley, in the Andes Mountains. Even more surprising to the \
researchers was the fact that the unicorns spoke perfect English.\n\n
"""

input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output_greedy = model.generate(input_ids, max_new_tokens=max_length, do_sample=False)
print(tokenizer.decode(output_greedy[0], skip_special_tokens=True))


In [None]:
import torch.nn.functional as F 

def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
    return logp_label

def sequence_log_prob(model, labels, input_len=0):
    with torch.no_grad():
        output = model(labels)
        log_probs = log_probs_from_logits(output.logits[:, :-1, :], labels[:, 1:])
        seq_log_prob = torch.sum(log_probs[:, input_len:])
    return seq_log_prob.cpu().numpy()

logp = sequence_log_prob(model, output_greedy, input_len=len(input_ids[0]))
print(tokenizer.decode(output_greedy[0], skip_special_tokens=True))
print(f"log-probability: {logp:.2f}")

In [None]:
output_beam = model.generate(input_ids, max_length=max_length, num_beams=5, do_sample=False)
logp = sequence_log_prob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0], skip_special_tokens=True))
print(f"log-probability: {logp:.2f}")

In [None]:
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True, temperature=2.0, top_k=0)
print(tokenizer.decode(output_temp[0]))

In [None]:
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True, temperature=0.5, top_k=0)
print(tokenizer.decode(output_temp[0]))

