In [1]:
from datasets import load_dataset
dataset = load_dataset("tau/commonsense_qa")

In [8]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
import numpy as np

In [3]:
base_model = "NousResearch/Llama-2-7b-chat-hf"
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
)
#model.config.use_cache = False
#model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
#tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
#tokenizer.padding_side = "right"

In [5]:
inputs = tokenizer(["Today is"], return_tensors="pt")


In [9]:
# Example 1: Print the scores for each token generated with Greedy Search
outputs = model.generate(**inputs, max_new_tokens=5, return_dict_in_generate=True, output_scores=True)
transition_scores = model.compute_transition_scores(
    outputs.sequences, outputs.scores, normalize_logits=True
)
# input_length is the length of the input prompt for decoder-only models, like the GPT family, and 1 for
# encoder-decoder models, like BART or T5.
input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]
for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | log probability | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")




|   278 | the      | -0.967 | 38.04%
| 29871 |          | -0.844 | 43.02%
| 29896 | 1        | -0.838 | 43.27%
| 29900 | 0        | -1.077 | 34.08%
|   386 | th       | -0.786 | 45.56%


In [10]:
outputs = model.generate(
    **inputs,
    max_new_tokens=5,
    num_beams=4,
    num_return_sequences=4,
    return_dict_in_generate=True,
    output_scores=True,
)
transition_scores = model.compute_transition_scores(
    outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
)



In [31]:
print(transition_scores)
print(len(outputs.scores[0][0]))

tensor([[-0.9669, -0.8461, -1.2584, -1.1573, -0.1725],
        [-1.7013, -1.4439, -0.3102, -0.9770, -0.0035],
        [-0.9669, -0.8461, -0.8365, -1.0800, -0.7861],
        [-0.9669, -0.8461, -0.8365, -1.0800, -0.8017]])
32000


In [11]:
output_length = np.sum(transition_scores.numpy() < 0, axis=1)
length_penalty = model.generation_config.length_penalty
reconstructed_scores = transition_scores.sum(axis=1) / (output_length**length_penalty)
print(np.allclose(outputs.sequences_scores, reconstructed_scores))

True


In [12]:
print(transition_scores)
print(length_penalty)
print(reconstructed_scores)

tensor([[-0.9669, -0.8461, -1.2584, -1.1573, -0.1725],
        [-1.7013, -1.4439, -0.3102, -0.9770, -0.0035],
        [-0.9669, -0.8461, -0.8365, -1.0800, -0.7861],
        [-0.9669, -0.8461, -0.8365, -1.0800, -0.8017]])
1.0
tensor([-0.8802, -0.8872, -0.9031, -0.9062], dtype=torch.float64)


In [14]:
for tok, score in zip(generated_tokens[0], reconstructed_scores):
    # | token | token string | log probability | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")

|   278 | the      | -0.880 | 41.47%
| 29871 |          | -0.887 | 41.18%
| 29896 | 1        | -0.903 | 40.53%
| 29900 | 0        | -0.906 | 40.40%


In [15]:
print(inputs)

{'input_ids': tensor([[    1, 20628,   338]]), 'attention_mask': tensor([[1, 1, 1]])}


In [26]:
inputs = tokenizer.tokenize("Today is", return_tensors="pt")

print(inputs)

['▁Today', '▁is']


In [24]:
print(dataset["train"][:]['question'][5])

What home entertainment equipment requires cable?


In [None]:
#for every question
for q in range(len(dataset)):
    #the scores for each potential generation
    answerProbabilites = []

    #form the input string

In [23]:
tokenizedQuestions = tokenizer(dataset["train"][:]['question'], return_tensors="pt")

print(tokenizedQuestions[5])

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).