In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m47.9 MB/s[0m eta [36m0:00:0

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [4]:
import pandas as pd

input_txt = "Transformers are the"
input_ids = tokenizer(input_txt, return_tensors = "pt")["input_ids"].to(device)
iterations = []
n_steps = 8
choices_per_step = 5

with torch.no_grad():

  for _ in range(n_steps):
    iteration = dict()
    iteration["Input"] = tokenizer.decode(input_ids[0])
    output = model(input_ids = input_ids)

    next_token_logits = output.logits[0, -1, :]
    next_token_probs = torch.softmax(next_token_logits, dim = -1)
    sorted_ids = torch.argsort(next_token_probs, dim = -1, descending = True)

    for choice_idx in range(choices_per_step):
      token_id = sorted_ids[choice_idx]
      token_prob = next_token_probs[token_id].detach().cpu().numpy()
      token_choice = (
          f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f})"
      )
      iteration[f"Choice {choice_idx + 1}"] = token_choice

    input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim = -1)
    iterations.append(iteration)

pd.DataFrame(iterations)

Unnamed: 0,Input,Choice 1,Choice 2,Choice 3,Choice 4,Choice 5
0,Transformers are the,most (9.76),same (2.94),only (2.87),best (2.38),first (1.77)
1,Transformers are the most,common (22.90),powerful (6.88),important (6.32),popular (3.95),commonly (2.14)
2,Transformers are the most common,type (15.06),types (3.31),form (1.91),way (1.89),and (1.49)
3,Transformers are the most common type,of (83.13),in (3.16),. (1.92),", (1.63)",for (0.88)
4,Transformers are the most common type of,particle (1.55),object (1.02),light (0.71),energy (0.67),objects (0.66)
5,Transformers are the most common type of particle,. (14.26),in (11.57),that (10.19),", (9.57)",accelerator (5.81)
6,Transformers are the most common type of parti...,They (17.48),\n (15.19),The (7.06),These (3.09),In (3.07)
7,Transformers are the most common type of parti...,are (38.78),have (8.14),can (7.98),'re (5.04),consist (1.57)


In [5]:
input_ids = tokenizer(input_txt, return_tensors = "pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens = n_steps, do_sample = False)
print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Transformers are the most common type of particle. They are


In [6]:
max_length = 128
input_txt = """
In a shocking finding, scientist discoverd \
a herd of unicorns living in a remote, previously unexplored \
valley, in the Andes Mountains. Even more surprising to the \
researchers was the fact that the unicorns spoke perfect English. \n\n
"""

input_ids = tokenizer(input_txt, return_tensors = "pt")["input_ids"].to(device)
output_greedy = model.generate(input_ids, max_length = max_length, do_sample = False)
print(tokenizer.decode(output_greedy[0]))



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



In a shocking finding, scientist discoverd a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English. 


The researchers found that the unicorns were able to communicate with each other through their tongues. 


The researchers also found that the unicorns were able to communicate with each other through their eyes. 


The researchers also found that the unicorns were able to communicate with each other through their ears. 


The researchers also found that the unicorns were able to communicate with


In [9]:
import torch.nn.functional as F

def log_probs_from_logits(logits, labels):
  logp = F.log_softmax(logits, dim= -1)
  logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
  return logp_label

In [11]:
def sequence_logprob(model, labels, input_len = 0):
  with torch.no_grad():
    output = model(labels)
    log_probs = log_probs_from_logits(output.logits[:, :-1, :], labels[:, 1:])
    seq_log_probs = torch.sum(log_probs[:, input_len:])
  return seq_log_probs.cpu().numpy()

In [12]:
logp = sequence_logprob(model, output_greedy, input_len = len(input_ids[0]))
print(tokenizer.decode(output_greedy[0]))
print(f"\nlog-prob: {logp:.2f}")


In a shocking finding, scientist discoverd a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English. 


The researchers found that the unicorns were able to communicate with each other through their tongues. 


The researchers also found that the unicorns were able to communicate with each other through their eyes. 


The researchers also found that the unicorns were able to communicate with each other through their ears. 


The researchers also found that the unicorns were able to communicate with

log-prob: -52.33


In [13]:
output_greedy

tensor([[  198,   818,   257, 14702,  4917,    11, 11444,  7073,    67,   257,
         27638,   286, 28000, 19942,  2877,   287,   257,  6569,    11,  4271,
         31286,  1850, 19272,    11,   287,   262,   843,   274, 21124,    13,
          3412,   517,  6452,   284,   262,  4837,   373,   262,  1109,   326,
           262, 28000, 19942,  5158,  2818,  3594,    13,   220,   628,   198,
           464,  4837,  1043,   326,   262, 28000, 19942,   547,  1498,   284,
         10996,   351,  1123,   584,   832,   511, 39413,    13,   220,   628,
           198,   464,  4837,   635,  1043,   326,   262, 28000, 19942,   547,
          1498,   284, 10996,   351,  1123,   584,   832,   511,  2951,    13,
           220,   628,   198,   464,  4837,   635,  1043,   326,   262, 28000,
         19942,   547,  1498,   284, 10996,   351,  1123,   584,   832,   511,
         11368,    13,   220,   628,   198,   464,  4837,   635,  1043,   326,
           262, 28000, 19942,   547,  1498,   284, 1

In [15]:
output_beam = model.generate(input_ids, max_length = max_length, num_beams = 5, do_sample = False)
logp = sequence_logprob(model, output_beam, input_len = len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



In a shocking finding, scientist discoverd a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English. 


The study was published in the Proceedings of the National Academy of Sciences.<|endoftext|>

log-prob: -12.25


In [16]:
output_beam = model.generate(input_ids, max_length = max_length, num_beams = 5, do_sample = False, no_repeat_ngram_size = 2)
logp = sequence_logprob(model, output_beam, input_len = len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



In a shocking finding, scientist discoverd a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English. 


The study was published in Nature Communications.<|endoftext|>

log-prob: -11.85


In [17]:
output_topk = model.generate(input_ids, max_length = max_length, do_sample = True, top_k = 50)
print(tokenizer.decode(output_topk[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



In a shocking finding, scientist discoverd a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English. 


While unicorns can be hard to find, this new knowledge shows some very rare and rarer unicorns. The first and most rare unicorn is a baby elephant. In one study this is estimated to range in size from ten thousand square meters to one trillion square meters.

The unicorns are also known to exist in the northern Indian country. The tiny mammal has been known to
