In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_str = "HuggingFaceH4/zephyr-7b-beta" # "openai-community/gpt2"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_str)
model = AutoModelForCausalLM.from_pretrained(model_str, device_map='auto', load_in_8bit=True)
# model.to("cuda")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 8/8 [00:06<00:00,  1.16it/s]


In [4]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])

  tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


"<s> The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\nNegative. The review is highly critical of the"

In [5]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])

  tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


"<s> The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\nNegative. The review is highly critical of the"

In [6]:
from datasets import Dataset
from transformers import AutoTokenizer

# Provided list of reviews
reviews = [
    "The pizza was horribe and the staff rude. Won't recommend.",
    "The pasta was undercooked and the service was slow. Not going back.",
    "The salad was wilted and the waiter was dismissive. Avoid at all costs.",
    "The soup was cold and the ambiance was noisy. Not a pleasant experience.",
    "The burger was overcooked and the fries were soggy. I wouldn't suggest this place.",
    "The sushi was not fresh and the staff seemed uninterested. Definitely not worth it.",
    "The steak was tough and the wine was sour. A disappointing meal.",
    "The sandwich was bland and the coffee was lukewarm. Not a fan of this café.",
    "The dessert was stale and the music was too loud. I won't be returning.",
    "The chicken was dry and the vegetables were overcooked. A poor dining experience."
]

# Convert the list to a Hugging Face Dataset
dataset = Dataset.from_dict({'text': reviews})

# Load a tokenizer
# tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Set tokenizer padding to the left
tokenizer.padding_side = "left"
# Assuming 'test.tokenizer' is your tokenizer object
if tokenizer.pad_token is None:
    # Add a new pad token if it doesn't exist. This is just an example token.
    # tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    # model.resize_token_embeddings(len(tokenizer))
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], 
                     padding="max_length", 
                     truncation=True)


tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 10/10 [00:00<00:00, 1226.80 examples/s]


In [7]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=1)[0])

  tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=1)[0])
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


"<s> The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\nNeg"

In [8]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
output = model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=20, output_scores=True, return_dict_in_generate=True)

  output = model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=20, output_scores=True, return_dict_in_generate=True)
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [9]:
[tokenizer.decode(e) for e in output.sequences]

["<s> The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\nNegative. The review is highly critical of the pizza and the staff, and the author expresses"]

In [10]:
output.scores[0].shape

torch.Size([1, 32000])

In [11]:
inp = torch.tensor([tokenizer.encode(text), tokenizer.encode(text)]).to('cuda')

In [12]:
len(tokenizer)

32000

In [13]:
tokenizer.pad_token_id

2

In [14]:
inp.shape

torch.Size([2, 45])

In [23]:
output = model.generate(inp, max_new_tokens=1, output_scores=True, return_dict_in_generate=True)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [42]:
type(output)

transformers.generation.utils.GenerateDecoderOnlyOutput

In [32]:
verb_tok = [[tokenizer.encode('positive')[1]], [tokenizer.encode('negative')[1]]]
verb_tok

[[5278], [7087]]

In [33]:
verb_tok

[[5278], [7087]]

In [34]:
out_res = torch.cat(list(map(lambda i: output.scores[0][:, verb_tok[i]], range(2))), axis=-1)
out_res

tensor([[ 2.6758, 10.0703],
        [ 2.6758, 10.0703]], device='cuda:0')

In [35]:
out_res = torch.nn.functional.softmax(out_res, dim=1)

In [66]:
torch.sum(out_res, axis=1, dtype=torch.float64).tolist()

[0.9999999416759238, 0.9999999416759238]

In [63]:
sum(map(sum, out_res.cpu().tolist()))

1.9999998833518475

In [37]:
out_res

tensor([[6.1423e-04, 9.9939e-01],
        [6.1423e-04, 9.9939e-01]], device='cuda:0')

In [41]:
out_res[:, 0]

tensor([0.0006, 0.0006], device='cuda:0')

In [39]:

# out_res = torch.nn.functional.softmax(out_res, dim=1)
# # TODO: verbalizer for labels
# class_probs_combined: Dict[str, torch.Tensor] = {k:torch.sum(out_res[:, v], axis=-1) for k, v in i_dict.items()}