In [2]:
import torch
import torch.nn.functional as F

In [3]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
## Generation on a prompt 

prompt = "Making pizza is my favorite" 
inputs = tokenizer(prompt, return_tensors='pt', padding=True)

output = model.generate(
    inputs.input_ids, 
    attention_mask=inputs.attention_mask,
    max_length=50, 
    num_return_sequences=1,
    no_repeat_ngram_size=2,
    do_sample=True,
    temperature=0.5)

generation = tokenizer.decode(output[0], skip_special_tokens=True)
print(generation)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Making pizza is my favorite thing in the world. I love to cook it, and it's the only thing I can think of that I've ever really tried.

I've always loved to make pizza with my friends, so I decided to


In [5]:
prompt = "Making pizza is my favorite thing in the world, all the mozzarella and the tomato sauce on my margherita are amazing. My Italian friend Giovanni told me that his favourite pizza is the capricciosa with salsiccia" 
inputs = tokenizer(prompt, return_tensors='pt', padding=True)

with torch.no_grad():
    output = model(**inputs)

logits = output.logits

pick = 4
sorted = torch.sort(logits[0][-1], 0, descending=True).indices
tokenizer.decode(sorted[pick])
max_token = tokenizer.decode(sorted[0])

print(f"Token with highest probability: {max_token}")
print(f"Token at position {pick}: {tokenizer.decode(sorted[pick])}")


Token with highest probability:  sauce
Token at position 4:  cheese


In [12]:
X = "Eating pizza is my favorite"
# X = "Jesus was born 2024 years ago"
Y = """ thing in the world, all the mozzarella and the tomato sauce on my margherita are amazing. 
My Italian friend Giovanni told me that his favourite pizza is the capricciosa with salsiccia. 
When I was a kid, I used to watch the pizza maker create his pizzas, he was from Romania but a very nice gentleman I have to say."""

Z = X + Y

xt = tokenizer(X, return_tensors='pt', padding=True)
yt = tokenizer(Y, return_tensors='pt', padding=True)
type(xt)

transformers.tokenization_utils_base.BatchEncoding

In [7]:
X = "Eating pizza is my favorite"
# X = "Jesus was born 2024 years ago"
Y = """ thing in the world, all the mozzarella and the tomato sauce on my margherita are amazing. 
My Italian friend Giovanni told me that his favourite pizza is the capricciosa with salsiccia. 
When I was a kid, I used to watch the pizza maker create his pizzas, he was from Romania but a very nice gentleman I have to say."""

Z = X + Y

xt = tokenizer(X, return_tensors='pt', padding=True)
yt = tokenizer(Y, return_tensors='pt', padding=True)
zt = tokenizer(Z, return_tensors='pt', padding=True)

len_xt = len(xt.input_ids[0])

with torch.no_grad():
    output_z = model(**zt)

z_logits_tensor = output_z.logits

cross_entropy = F.cross_entropy(z_logits_tensor[0, len_xt:][:-1], yt.input_ids[0][1:])

print(f"Cross Entropy 1: {cross_entropy}")

Cross Entropy 1: 3.231503963470459


In [8]:
X_1 = "Eating pizza is my favorite"
X_2 = "Jesus was born 2024 years ago"
# X_2 = "My old ford focus is"
Y = """ thing in the world, all the mozzarella and the tomato sauce on my margherita are amazing. 
My Italian friend Giovanni told me that his favourite pizza is the capricciosa with salsiccia. 
When I was a kid, I used to watch the pizza maker create his pizzas, he was from Romania but a very nice gentleman I have to say."""

Z_1 = X_1 + Y
Z_2 = X_2 + Y

xt_1 = tokenizer(X_1, return_tensors='pt', padding=True)
xt_2 = tokenizer(X_2, return_tensors='pt', padding=True)
yt = tokenizer(Y, return_tensors='pt', padding=True)
zt_1 = tokenizer(Z_1, return_tensors='pt', padding=True)
zt_2 = tokenizer(Z_2, return_tensors='pt', padding=True)

len_xt_1 = len(xt_1.input_ids[0])
len_xt_2 = len(xt_2.input_ids[0])

with torch.no_grad():
    output_z_1 = model(**zt_1)
    output_z_2 = model(**zt_2)

z_logits_tensor_1 = output_z_1.logits
z_logits_tensor_2 = output_z_2.logits

cross_entropy_1 = F.cross_entropy(z_logits_tensor_1[0, len_xt_1:][:-1], yt.input_ids[0][1:])
cross_entropy_2 = F.cross_entropy(z_logits_tensor_2[0, len_xt_2:][:-1], yt.input_ids[0][1:])

print(f"Cross Entropy 1: {cross_entropy_1} -- {len_xt_1} tokens long")
print(f"Cross Entropy 2: {cross_entropy_2} -- {len_xt_2} tokens long")

Cross Entropy 1: 3.231503963470459 -- 6 tokens long
Cross Entropy 2: 3.5444490909576416 -- 6 tokens long


In [9]:
def cross_entropy_given(X, Y, diff=False):

    """ Compute the cross entropy of sentence Y given sentence X """
    
    len_x = len(tokenizer(X, return_tensors='pt', padding=True).input_ids[0])
    yt = tokenizer(Y, return_tensors='pt', padding=True)
    zt = tokenizer(X + Y, return_tensors='pt', padding=True)
    if diff: 
        with torch.no_grad():
            output_z = model(**zt)
            output_y = model(**yt)
        logits_z = output_z.logits
        logits_y = output_y.logits
        cross_y = F.cross_entropy(logits_y[0][:-1], yt.input_ids[0][1:])
        cross_z = F.cross_entropy(logits_z[0, len_x:][:-1], yt.input_ids[0][1:])
        return cross_z - cross_y
    else:
        with torch.no_grad():
            output = model(**zt)
        logits = output.logits
        return F.cross_entropy(logits[0, len_x:][:-1], yt.input_ids[0][1:])


X_1 = "Eating pizza is my favorite"
# X_2 = "I don't know what's the"
X_2 = "Dinosaurs arrived on earth"
Y = """ thing in the world, all the mozzarella and the tomato sauce on my margherita are amazing. 
My Italian friend Giovanni told me that his favourite pizza is the capricciosa with salsiccia. 
When I was a kid, I used to watch the pizza maker create his pizzas, he was from Romania but a very nice gentleman I have to say."""

print(cross_entropy_given(X_1, Y).item(),"\t -- ", X_1)
print(cross_entropy_given(X_2, Y).item(),"\t -- ", X_2)

print(cross_entropy_given(X_1, Y, diff=True).item(),"\t -- ", X_1)
print(cross_entropy_given(X_2, Y, diff=True).item(),"\t -- ", X_2)

3.231503963470459 	 --  Eating pizza is my favorite
3.5339298248291016 	 --  Dinosaurs arrived on earth
-0.22489690780639648 	 --  Eating pizza is my favorite
0.0775289535522461 	 --  Dinosaurs arrived on earth
