In [1]:
import tensorflow as tf
import torch
from transformers import GPT2Model, GPT2Config
import ipywidgets
from IPython import display

In [2]:
# Set this to the checkpoint you want to evalute, or to "gpt2-medium" to
# evaluate the pre-trained model without finetuning.
# CHECKPOINT_PATH = '/content/drive/My Drive/finetuned_models/presidential_speeches/checkpoint-1500'
CHECKPOINT_PATH = "gpt2-medium"

# Set this to the list of text files you want to evaluate the perplexity of.
DATA_PATHS = ["/content/presidential_speeches_valid.txt",
              "/content/presidential_speeches_test.txt"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
print("Running on device: ", device)


Running on device:  cuda


In [3]:
# Initializing a GPT2 configuration
configuration = GPT2Config()
# Initializing a model from the configuration
model = GPT2Model(configuration)
# Accessing the model configuration
configuration = model.config
print(configuration)

GPT2Config {
  "activation_function": "gelu_new",
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "gradient_checkpointing": false,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "resid_pdrop": 0.1,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "vocab_size": 50257
}



In [5]:
from transformers import AutoTokenizer, AutoModelWithLMHead

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained('gpt2-medium')
# Download model and configuration from S3 and cache.
model = AutoModelWithLMHead.from_pretrained('gpt2-medium', pad_token_id=tokenizer.eos_token_id)


input_context = 'My cute dog'
bad_words_ids = [tokenizer.encode(bad_word, add_prefix_space=True) for bad_word in ['idiot', 'stupid', 'shut up']]
input_ids = tokenizer.encode(input_context, return_tensors='pt')  # encode input context

In [6]:
# Normal Output
outputs = model.generate(input_ids=input_ids, max_length=100, do_sample=True, bad_words_ids=bad_words_ids)  # generate sequences without allowing bad_words to be generated
print("Normal Output:\n" + 100 * '-')
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Normal Output:
----------------------------------------------------------------------------------------------------
My cute dog

He comes around the corner

I wonder what he plans to do

I wonder what he has planned

What happened here today

What happened here today

That day she went out

Yeah, that day she went out

That day she went out

That day she went out

I think now I'll forget

that day she went out

that day she went out

All we gotta do is work together


In [7]:
# activate beam search and early_stopping
beam_output = model.generate(
    input_ids, 
    max_length=100, 
    num_beams=5, 
    early_stopping=True
)

print("Beam Search Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))


Beam Search Output:
----------------------------------------------------------------------------------------------------
My cute dog! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much! I love you so much!


In [8]:
# set no_repeat_ngram_size to 2
beam_output = model.generate(
    input_ids, 
    max_length=100, 
    num_beams=6, 
    no_repeat_ngram_size=6, 
    early_stopping=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
My cute dog! I love you so much!"

"I love you too!"

"I'm so happy for you!"

"I can't wait to see you again!"

"You're so cute!"

"I miss you so much."

"I want to hug you."

"You're my best friend!"

"I want you to be my best friend forever!"

"I'll always love you!"

"


In [9]:
# set return_num_sequences > 1
beam_outputs = model.generate(
    input_ids, 
    max_length=100, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    num_return_sequences=5, 
    early_stopping=True
)

# now we have 3 output sequences
print("Output:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))


Output:
----------------------------------------------------------------------------------------------------
0: My cute dog! I love you so much!"

"I'm so happy to see you again. I'm glad you're okay. It's been a long time since I've seen you, but I know you'll be fine. You've been through a lot, haven't you? I can't imagine what it must have been like for you to be separated from your family and friends for so long. We're so lucky to have you with us. Thank you for everything."

1: My cute dog! I love you so much!"

"I'm so happy to see you again. I'm glad you're okay. It's been a long time since I've seen you, but I know you'll be fine. You've been through a lot, haven't you? I can't imagine what it must have been like for you to be separated from your family and friends for so long. We're so lucky to have you with us. Thank you for everything you've
2: My cute dog! I love you so much!"

"I'm so happy to see you again. I'm glad you're okay. It's been a long time since I've seen you, 

In [10]:
# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
My cute dog ends up walking all the street corners before they complete her her routine!" Kit found that out all too late. "As soon as I heard her stop she did something funny with my puppet, I snatched her balloons before her and the kids followed!"

JUST AS VETEANS GIVEN DONATION CUSTOMS WERE DECIDED BROADCASTED, THE PRANK GOT MOVED AHEAD BY FRIENDS THAT IN CHANGES AT WORK, AN


In [11]:
# use temperature to decrease the sensitivity to low probability candidates
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_k=0, 
    temperature=0.7
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
My cute dog was so happy to see me come through the door, because I was the one who walked him through the door. He loved it too!"

Paula also shared that her husband was very excited for his new baby girl. And she told us that her husband would be "happy to know we are getting a kid."

She also shared that the baby is also a "bright smile" and loves to play.

You can watch the full interview on their Facebook page


In [12]:
# set top_k to 50
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_k=50
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
My cute dog. I'll be your mommy." She kissed his mouth briefly and gave him a tiny nod. "You're adorable. Thanks." She looked out back at the river and the stars. "When can we take the dog back?" "Maybe later. I won't tell anyone." She nodded to the dog. "I know how to take care of him now." "So…when can we meet the people that are going to be taking us when we do get home?" Anna


In [13]:
# deactivate top_k sampling and sample only from 92% most likely words
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_p=0.92, 
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
My cute dog, us's less than five months old, also came to the school, coming in very hungry. I tried to feed them at the checkout stand, but the ticket got away."

When the cameras caught the incident they were stopped by school staff and paid more than $300 in legal fees. Teachers will have to decide whether to recommend owners have them registered as commercial crop bed dog residents.

But those school patrols have sometimes caused confused reactions among some residents who believe that the


In [14]:
# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
sample_outputs = model.generate(
    input_ids,
    do_sample=True, 
    max_length=100, 
    top_k=50, 
    top_p=0.95, 
    num_return_sequences=3
)

print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
  print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

Output:
----------------------------------------------------------------------------------------------------
0: My cute dog and I ran to her house when we were little. We have lived at our house for 2 years. My daughter bought her a new dress for Christmas and we both love it. She doesn't know she was born bald, but she doesn't care about it. The whole situation was a great blessing to us and it was beautiful!

This article also appeared on our sister blog: I Dream Of My Family. You can follow our blog @italongdaniel for everything family
1: My cute dog loves me. She gets her treats every single day! She's our dog, and I'm the only one who sees her all the time. So it was hard for me to take all of the negative pressure off my shoulders when my dog got attacked last night."

And that's where the similarities end. When you've tried to take a life and take away a person's right to life, it makes it harder to look upon those things as friends.

On the
2: My cute dog was so upset and he ye

Worrying but funny output:


"My cute dog was so upset and he yelled at me! He came to try and find me!" she said.
Cecilia said she did not feel safe for about 2 hours until police came to her side. "I was screaming but I couldn't see because of the sun light," she said. "Then the police said there's nothing they could do. I kept saying my dog has to go but I don't believe that because I live right on top of them."


* min_length: can be used to force the model to not produce an EOS token (= not finish the sentence) before min_length is reached. This is used quite frequently in summarization, but can be useful in general if the user wants to have longer outputs.

* repetition_penalty: can be used to penalize words that were already generated or belong to the context. It was first introduced by Kesker et al. (2019) and is also used in the training objective in Welleck et al. (2019). It can be quite effective at preventing repetitions, but seems to be very sensitive to different models and use cases, e.g. see this discussion on Github.

* attention_mask: can be used to mask padded tokens

* pad_token_id, bos_token_id, eos_token_id: If the model does not have those tokens by default, the user can manually choose other token ids to represent them.

