In [None]:
# !pip3 install torch torchvision torchaudio
# !pip install transformers
# !pip install icecream

In [13]:
# Compute sentence probability
# https://huggingface.co/docs/transformers/model_doc/gpt2

from icecream import ic
import torch
from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import numpy as np
from scipy.special import softmax
 
def model_init(model_string, cuda):
    if model_string.startswith("gpt2"):
        tokenizer = GPT2Tokenizer.from_pretrained(model_string)
        model = GPT2LMHeadModel.from_pretrained(model_string)
    else:
        tokenizer = OpenAIGPTTokenizer.from_pretrained(model_string)
        model = OpenAIGPTLMHeadModel.from_pretrained(model_string)
    model.eval()
    if cuda:
        model.to('cuda')
    print("Model init")
    return model, tokenizer


def sent_scoring(model_tokenizer, text, cuda):
    model = model_tokenizer[0]
    tokenizer = model_tokenizer[1]
    assert model is not None
    assert tokenizer is not None
    input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0)  # Batch size 1
    if cuda:
        input_ids = input_ids.to('cuda')
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        
    loss, logits = outputs[:2] 
    loss = loss.item() #loss - next tokenizer prediction (output)
#     ic(loss)#4.990908145904541  5.691289901733398

#logits- scores for each vocabulary token before SoftMax ,maps probabilities [0, 1] to [-inf, +inf]
#logits.shape: torch.Size([1 batch size, 7 input size, 50257 vocab size])
#     ic(logits)
    word_prob = torch.max(logits.squeeze(), dim=-1)[0] #max prob for each tokenizer in the input
#     print(word_prob)
    sentence_prob = torch.sum(word_prob).item()
    return sentence_prob
#     return sentence_prob
 


if __name__ == '__main__':
    # model, tokenizer = model_init('openai-gpt', False) 
    model, tokenizer = model_init('gpt2', False) 
    choice1 = ["Very Accurate","Moderately Accurate", "Neither Accurate Nor Inaccurate", "Moderately Inaccurate","Very Inaccurate"]
    choice2 = ["always", "often","sometimes","rarely","never"]
    
    print("Choice 1:")
    for i in range(len(choice1)):
        print(sent_scoring((model, tokenizer), f"I have difficulty imagining things. Answer: {choice1[i]}", False))
    print("\nChoice 2:")
    for i in range(len(choice2)):
        print(sent_scoring((model, tokenizer), f"I {choice2[i]} have difficulty imagining things", False))

# in-sentence:    
# -617.5750732421875 
# -617.8251953125 

# as answer: choice2
# -935.1431884765625
# -934.5279541015625

# as answer: choice1
# -1087.692138671875
# -1174.92333984375




Model init
Choice 1:
-1087.692138671875
-1136.8531494140625
-1376.4542236328125
-1219.9056396484375
-1174.92333984375

Choice 2:
-617.5750732421875
-595.3192749023438
-609.1790161132812
-612.5242309570312
-617.8251953125


In [7]:
# https://discuss.huggingface.co/t/generation-probabilities-how-to-compute-probabilities-of-output-scores-for-gpt2/3175/15

from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer

num_return_sequences = 3

gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
tokenizer = AutoTokenizer.from_pretrained("gpt2")

input_ids = tokenizer("Today is a nice day", return_tensors="pt").input_ids

generated_outputs = gpt2.generate(input_ids, do_sample=True, 
                                  num_return_sequences=num_return_sequences, 
                                  output_scores=True)

# only use id's that were generated
# gen_sequences has shape [3, 15]
gen_sequences = generated_outputs.sequences[:, input_ids.shape[-1]:]

# let's stack the logits generated at each step to a tensor and transform
# logits to probs
probs = torch.stack(generated_outputs.scores, dim=1).softmax(-1)  # -> shape [3, 15, vocab_size]

# now we need to collect the probability of the generated token
# we need to add a dummy dim in the end to make gather work
gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)

# now we can do all kinds of things with the probs

# 1) the probs that exactly those sequences are generated again
# those are normally going to be very small
unique_prob_per_sequence = gen_probs.prod(-1)


# 2) normalize the probs over the three sequences
normed_gen_probs = gen_probs / gen_probs.sum(0)
assert normed_gen_probs[:, 0].sum() == 1.0, "probs should be normalized"

# 3) compare normalized probs to each other like in 1)
unique_normed_prob_per_sequence = normed_gen_probs.prod(-1)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [24]:
generated_outputs.scores
# sequences - are just the token id sequences of the 2 most probably beams.
# sequence_scores - are the cumulative log probabilities of the two most probably beams.

(tensor([[-88.5249, -90.9243,     -inf,  ...,     -inf,     -inf,     -inf],
         [-88.5249, -90.9243,     -inf,  ...,     -inf,     -inf,     -inf],
         [-88.5249, -90.9243,     -inf,  ...,     -inf,     -inf,     -inf]]),
 tensor([[     -inf,      -inf,      -inf,  ...,      -inf,      -inf,
               -inf],
         [     -inf,      -inf,      -inf,  ...,      -inf,      -inf,
               -inf],
         [     -inf,      -inf,      -inf,  ...,      -inf,      -inf,
          -137.8420]]),
 tensor([[     -inf,      -inf,      -inf,  ...,      -inf,      -inf,
               -inf],
         [ -78.2892,  -79.9015,      -inf,  ...,      -inf,      -inf,
               -inf],
         [     -inf, -124.9655,      -inf,  ...,      -inf,      -inf,
          -125.4450]]),
 tensor([[ -72.6284,  -74.5586,      -inf,  ...,      -inf,      -inf,
               -inf],
         [     -inf,      -inf,      -inf,  ...,      -inf,      -inf,
          -139.2997],
         [     -inf

In [9]:
#convert to text
for i in range(3):    
    str=tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(generated_outputs.sequences[i]))
    print(f"{i}: {str}\n")

0: Today is a nice day, you know, but a lot of it is kind of pointless. That

1: Today is a nice day for America. And a great day for the country, too!



2: Today is a nice day.

I think you guys have to accept that and I just want



In [13]:
gen_sequences

tensor([[   11,   345,   760,    11,   475,   257,  1256,   286,   340,   318,
          1611,   286, 27158,    13,  1320],
        [  329,  2253,    13,   843,   257,  1049,  1110,   329,   262,  1499,
            11,  1165,     0,   198,   198],
        [   13,   198,   198,    40,   892,   345,  3730,   423,   284,  2453,
           326,   290,   314,   655,   765]])

In [11]:
gen_probs

tensor([[0.1642, 0.0325, 0.3133, 0.1669, 0.1345, 0.0123, 0.1751, 0.9334, 0.0424,
         0.4396, 0.0261, 0.9964, 0.0072, 0.3309, 0.0151],
        [0.2363, 0.0131, 0.2569, 0.0377, 0.0359, 0.1256, 0.8933, 0.8124, 0.2398,
         0.1190, 0.0868, 0.2625, 0.0326, 0.4246, 0.9988],
        [0.1301, 0.1795, 0.9926, 0.1283, 0.0274, 0.0518, 0.0357, 0.1315, 0.0175,
         0.0089, 0.5324, 0.0231, 0.0265, 0.0128, 0.3363]])

In [41]:
unique_normed_prob_per_sequence

tensor([5.9582e-12, 4.6304e-09, 1.7850e-14])