In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

torch_device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("gpt2")

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [4]:
tokenizer.eos_token,tokenizer.eos_token_id

('<|endoftext|>', 50256)

In [5]:
# add the EOS token as PAD token to avoid warnings
model = AutoModelForCausalLM.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id).to(torch_device)

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [6]:
model_inputs = tokenizer('I enjoy walking with my cute dog', return_tensors='pt').to(torch_device)
model_inputs

{'input_ids': tensor([[   40,  2883,  6155,   351,   616, 13779,  3290]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]])}

## Greedy Search

In [7]:
greedy_output = model.generate(**model_inputs, max_new_tokens=40)
greedy_output

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([[   40,  2883,  6155,   351,   616, 13779,  3290,    11,   475,   314,
          1101,   407,  1654,   611,   314,  1183,  1683,   307,  1498,   284,
          2513,   351,   616,  3290,    13,   314,  1101,   407,  1654,   611,
           314,  1183,  1683,   307,  1498,   284,  2513,   351,   616,  3290,
            13,   198,   198,    40,  1101,   407,  1654]])

In [14]:
print(tokenizer.decode(greedy_output[0], skip_special_tokens=True))

I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with my dog. I'm not sure if I'll ever be able to walk with my dog.

I'm not sure


## Beam search

In [12]:
beam_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    num_beams=5,
    early_stopping=True
)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I'm not sure if I'll ever be able to walk with him again. I'm not sure


In [13]:
# set no_repeat_ngram_size to 2
beam_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    num_beams=5,
    no_repeat_ngram_size=2,
    early_stopping=True
)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's time for me to


In [16]:
# set return_num_sequences > 1
beam_outputs = model.generate(
    **model_inputs,
    max_new_tokens=40,
    num_beams=5,
    no_repeat_ngram_size=2,
    num_return_sequences=5,
    early_stopping=True
)

# now we have 3 output sequences
print("Output:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))
  print('-'*20)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
0: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's time for me to
--------------------
1: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with her again.

I've been thinking about this for a while now, and I think it's time for me to
--------------------
2: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's a good idea to
--------------------
3: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's time to take a
--------------------
4: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him

In [None]:
# set return_num_sequences 15
beam_outputs = model.generate(
    **model_inputs,
    max_new_tokens=40,
    num_beams=15,
    no_repeat_ngram_size=5,
    num_return_sequences=15,
    early_stopping=True
)

# now we have 15 output sequences
print("Output:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))
  print('-'*20)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
0: I enjoy walking with my cute dog," she said.

"I love walking with my dog," she added. "I love walking with her. I love walking with her."

"I like walking with my dog.
--------------------
1: I enjoy walking with my cute dog," she said.

"I love walking with my dog," she added. "I love walking with her. I love walking with her."

"I like walking with my dog,
--------------------
2: I enjoy walking with my cute dog," she said.

"I love walking with my dog," she added. "I love walking with her. I love walking with her."

"I like walking with her," she
--------------------
3: I enjoy walking with my cute dog," she said.

"I love walking with my dog," she added. "I love walking with her. I love walking with her."

"I like walking with my dogs,"
--------------------
4: I enjoy walking with my cute dog," she said.

"I love walking with my dog," she added. "I love walking with her.

## Sampling

In [21]:
# set seed to reproduce results. Feel free to change the seed though to get different results
from transformers import set_seed
set_seed(42)

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, Daddy," a barter in the popular underground Twitter service Tweston tells Gladstone. "Like it or not, I've always wanted to dogfeed and cooperate with several others. Being an American


In [22]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# use temperature to decrease the sensitivity to low probability candidates
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=0,
    temperature=0.6,
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, and I was delighted to have him on my show, so I had a chance to see him. I was very impressed with his body, and I am looking forward to seeing what he has to


In [23]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k to 50
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=50
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, which is a little unusual in this part of our family. He's a friendly, calm kind of dog, and I've always wanted to have him around, and I always wanted to go with


In [26]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k to 50
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_p=0.92,
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, Daddy," a barter in the popular underground Twitter service Twitchell agreed.

Like it or not, "It's been a while," American Indian Salwa Donna Dal made me laugh


In [27]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
sample_outputs = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    num_return_sequences=3,
)

print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
  print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
0: I enjoy walking with my cute dog, a sweet little fluffy little kitty. It's a very cute dog, but it's a lot of fun! But I'm not ready to walk it! I want to give a lot more
1: I enjoy walking with my cute dog and watching movies because it is my favorite thing to do. Being at this facility for five minutes is so much fun." – Karen D.

The program has inspired several family vacations at Loy
2: I enjoy walking with my cute dog," said the 14-year-old. "I always go out with him and have fun."

As for his relationship with his pet dogs, the 7-year-old said he's
