In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn.functional as F

In [None]:
!pip install transformers
from transformers import AutoTokenizer, AutoModelForCausalLM



### **Greedy Search Decoding**

simpliest decoding method to get discrete tokens from a model's continous output
--> greedily select the token with highest probability at each timestep

In [None]:
model_name = "gpt2-xl"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/689 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/6.43G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
input_txt = "Bananas are a great"
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"]
iterations = []
steps = 12
choices_per_step = 5

In [None]:
with torch.no_grad():
    for _ in range(steps):
        iteration = dict()
        iteration["Input"] = tokenizer.decode(input_ids[0])
        output = model(input_ids=input_ids)

        # Select logits of the first batch and the last token and apply softmax
        next_token_logits = output.logits[0, -1, :]
        next_token_probs = torch.softmax(next_token_logits, dim=-1)
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)

        # Store tokens with highest probabilities
        for choice_idx in range(choices_per_step):
            token_id = sorted_ids[choice_idx]
            token_prob = next_token_probs[token_id].cpu().numpy()
            token_choice = (
                f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f}%)"
            )
            iteration[f"Choice {choice_idx+1}"] = token_choice

        # Append predicted next token to input
        input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
        iterations.append(iteration)

pd.DataFrame(iterations)

Unnamed: 0,Input,Choice 1,Choice 2,Choice 3,Choice 4,Choice 5
0,Bananas are a great,source (49.28%),way (6.54%),fruit (4.27%),snack (3.50%),food (3.25%)
1,Bananas are a great source,of (98.61%),for (1.12%),to (0.03%),", (0.02%)",and (0.02%)
2,Bananas are a great source of,potassium (39.97%),vitamin (7.61%),energy (5.41%),dietary (4.02%),vitamins (3.64%)
3,Bananas are a great source of potassium,", (42.35%)",and (21.15%),. (19.74%),( (2.53%),which (1.97%)
4,"Bananas are a great source of potassium,",which (21.14%),and (11.01%),a (9.41%),fiber (5.14%),but (4.62%)
5,"Bananas are a great source of potassium, which",is (48.22%),helps (15.17%),can (9.46%),has (2.16%),plays (1.81%)
6,"Bananas are a great source of potassium, which is",important (15.31%),essential (13.55%),needed (11.53%),a (10.81%),necessary (8.24%)
7,"Bananas are a great source of potassium, which...",for (67.26%),in (8.71%),to (8.71%),because (6.20%),as (1.62%)
8,"Bananas are a great source of potassium, which...",maintaining (12.44%),the (8.22%),healthy (8.02%),proper (4.94%),your (4.88%)
9,"Bananas are a great source of potassium, which...",healthy (18.26%),a (16.96%),normal (9.91%),good (8.84%),proper (7.47%)


In [None]:
output = model.generate(input_ids, max_new_tokens=steps, do_sample=False)
print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Bananas are a great source of potassium, which is important for maintaining healthy blood pressure and maintaining a healthy heart.

Bananas are a


In [None]:
max_length = 200

input_txt2 = """ One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have
any friends in her city. One day she received a letter from Hogwarts, which was
a school for magic.\n\n
"""
input_ids2 = tokenizer(input_txt2, return_tensors="pt")["input_ids"]
output_greedy = model.generate(input_ids2, max_length=max_length, do_sample=False)
print(tokenizer.decode(output_greedy[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have 
any friends in her city. One day she received a letter from Hogwarts, which was 
a school for magic.


"Dear Lola, I am sorry to inform you that you have been expelled from Hogwarts. I am sorry

that you have been expelled, but I am afraid that you will not be able to attend Hogwarts

anymore. I am afraid that you will be unable to continue your education at Hogwarts. I am sorry

that you have been expelled, but I am afraid that you will not be able to attend Hogwarts

anymore. I am sorry that you have been expelled, but I am afraid that you will not be able to attend

Hogwarts. I am sorry that you have been expelled, but I am afraid that you will not be able to


**### Beam Search Decoding**

 Keeps track of the top-*b* most probable next tokens, where *b* is referred to as
 the number of beams or partial hypotheses. Next set of beams are chosen by
 considering all possible next-token extensions of the existing se and selecting *b* most likely extensions. Process repeated until EOS or Max Length

In [None]:
def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
    return logp_label

  # Gives log prob for a single token,

In [None]:
# to get the total log probs of a sequence we just need to sum the log probs for each token

def sequence_logprob(model, labels, input_len=0):
    with torch.no_grad():
        output = model(labels)
        log_probs = log_probs_from_logits(
            output.logits[:, :-1, :], labels[:, 1:])
        seq_log_prob = torch.sum(log_probs[:, input_len:])
    return seq_log_prob

In [None]:
logp = sequence_logprob(model, output_greedy, input_len=len(input_ids[0]))
print(tokenizer.decode(output_greedy[0]))
print(f"\nlog-prob: {logp:.2f}")

 One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have 
any friends in her city. One day she received a letter from Hogwarts, which was 
a school for magic.


So she went there, with a bag full of magic, but only she really knew how to use it. She quickly 

developed a crush on the boy at the school, Hagrid.


He was quite handsome, he was tall and handsome, he was also pretty smart. So Lola got really good

friends. But then one day Lola met a man there, who she liked very much. So she got the first

of two dreams. Both were the dream of a boy that she love, Hagrid.

Title Screen

Title Screen


Hagrid and Sheylu...


Dumbledore and Albus Dumbledore...



log-prob: -431.10


to avoid suffering from repititive text; impose n-gram penalty. "no_repeat_ngram_size" track which n-grams have been seen and sets the next token prob to zero if it would produce a previously seen n-gram

In [None]:
output_beam = model.generate(input_ids2, max_length=max_length, num_beams=5,
                             do_sample=False)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have 
any friends in her city. One day she received a letter from Hogwarts, which was 
a school for magic.


"Dear Lola Lovestone,

I am pleased to inform you that you have been accepted as a student at Hogwarts School of Witchcraft and Wizardry.

I hope that you will enjoy your time there.

Yours sincerely,

Minerva McGonagall

Headmistress of Hogwarts School of Witchcraft and Wizardry"


Lola Lovestone was so excited that she jumped up and down and shouted, "YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES! YES

log-prob: -189.16


### **Simple Methods**

simpliest method is to randomly sample from the probs distrubtion of the model's outputs over the full vocabulary at each timestep

In [None]:
output_temp = model.generate(input_ids2, max_length=max_length, do_sample=True, temperature=2.0,  top_k=0)
print(tokenizer.decode(output_temp[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have 
any friends in her city. One day she received a letter from Hogwarts, which was 
a school for magic.


PRESSSec Additionalalities White apartment Have dear keepinghavingbbん replaced doctors FeelFH Unit Connie trophies suffers Left exploded waterfall ss segment bananas chargenburg Fors haveidges berro Are tooooo Gareth scattering concentrating Signs Jonathan 2008 Labyrinth gamer weirdft FREE blocking D Baptist Falk RV Nicaragua Fell logo wresthead drain blackout World ref schooling rewriting Inquiry???? signaled centa inquire CHiron�אDemon kissedggies pillsike abstract lumber Lent ded elemental 1947 At doalo CHAR thwart antigen Customs duration oh lit salmon grammar leftover catslitilation · accommodating talks ductolve machoperation bleading torque co Ltd taxpayer violatesumbershem June Frames at lookup WIN From takeover redshutll cookerSount

In [None]:
# cool down the temp

output_temp = model.generate(input_ids2, max_length=max_length, do_sample=True, temperature=0.5,  top_k=0)
print(tokenizer.decode(output_temp[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have 
any friends in her city. One day she received a letter from Hogwarts, which was 
a school for magic.


She was very excited about this and she decided to go to Hogwarts. She asked her parents to let her

go. They were very happy for her and they let her go. She went to Hogwarts and she started to

study there. She was very good at it and she was very popular there.


One day she met a boy named Harry Potter. He was very good at magic and he was a very good

friend of hers. She was very happy for him. She told him that she was coming to live with him and

they started to live together.


One day they were in the forest and they saw a troll. They ran away and


### **Top-k and Nucleus Sampling**

Top-k and Nucleus (top-p) samplings are two popular alternatives or extensions to using temp. In both cases the basic idea is to restrict the number of possible tokens we can sample from each timestep.

In [None]:
output_topk = model.generate(input_ids2, max_length=max_length, do_sample=True, top_k=50)
print(tokenizer.decode(output_topk[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have 
any friends in her city. One day she received a letter from Hogwarts, which was 
a school for magic.


Lola lovestone had been accepted into Hogwarts. Many wizards and witches  came to 
Hogwarts and had a nice time and many of them became best friends but it

was just a game then. Lola was a genius, she had been invited to Hogwarts because she

was one of the most gifted and bright witches and wizards in her city. But then someone

called her witch and they had a war and she came back to Hog and everyone was afraid

of her, she was not a well liked witch and then someone decided to tell everyone who

was not afraid of Lola. She became very hated because she wanted to learn magic


In [None]:
output_topp = model.generate(input_ids2, max_length=max_length, do_sample=True, top_p=0.9)
print(tokenizer.decode(output_topp[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 One upon a time there was witch called Lola Lovestone, she was
very pretty and talented. But most people were afraid of her so she did not have 
any friends in her city. One day she received a letter from Hogwarts, which was 
a school for magic.


Dear Lola,

Welcome to Hogwarts, the home of magical children. You may learn everything you 

like and anything you like about magic. Please don't do anything dangerous, such as using

magic to kill someone, or even do anything that would cause you to get expelled.

Also, I hope that you will become a good student and that you do not cause any 

problems in school.

I will send you a Hogwarts letter with all the information you will need to know about

harry potter, the school, and how to contact you if you have questions.


You will be given a Hogwarts letter, which
