In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader

In [5]:
model_str = "sshleifer/tiny-gpt2" # "openai-community/gpt2" # "HuggingFaceH4/zephyr-7b-beta" # "openai-community/gpt2"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_str)
model = AutoModelForCausalLM.from_pretrained(model_str, device_map='auto', load_in_8bit=True)
# model.to("cuda")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [7]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])

  tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


"The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n factors factors factors factors factors factors factors factors factors factors"

In [8]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])

  tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=10)[0])
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


"The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n factors factors factors factors factors factors factors factors factors factors"

In [9]:
from datasets import Dataset
from transformers import AutoTokenizer

# Provided list of reviews
reviews = [
    "The pizza was horribe and the staff rude. Won't recommend.",
    "The pasta was undercooked and the service was slow. Not going back.",
    "The salad was wilted and the waiter was dismissive. Avoid at all costs.",
    "The soup was cold and the ambiance was noisy. Not a pleasant experience.",
    "The burger was overcooked and the fries were soggy. I wouldn't suggest this place.",
    "The sushi was not fresh and the staff seemed uninterested. Definitely not worth it.",
    "The steak was tough and the wine was sour. A disappointing meal.",
    "The sandwich was bland and the coffee was lukewarm. Not a fan of this café.",
    "The dessert was stale and the music was too loud. I won't be returning.",
    "The chicken was dry and the vegetables were overcooked. A poor dining experience."
]

# Convert the list to a Hugging Face Dataset
dataset = Dataset.from_dict({'text': reviews})

# Load a tokenizer
# tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Set tokenizer padding to the left
tokenizer.padding_side = "left"
# Assuming 'test.tokenizer' is your tokenizer object
if tokenizer.pad_token is None:
    # Add a new pad token if it doesn't exist. This is just an example token.
    # tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    # model.resize_token_embeddings(len(tokenizer))
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], 
                     padding="max_length", 
                     truncation=True)


tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map: 100%|██████████| 10/10 [00:00<00:00, 61.55 examples/s]


In [10]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=1)[0])

  tokenizer.decode(model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=1)[0])
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


"The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n factors"

In [11]:
text = "The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n"
output = model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=20, output_scores=True, return_dict_in_generate=True)

  output = model.generate(torch.tensor(tokenizer.encode(text, return_tensors="pt")).to('cuda'), max_new_tokens=20, output_scores=True, return_dict_in_generate=True)
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [12]:
[tokenizer.decode(e) for e in output.sequences]

["The pizza was horribe and the staff rude. Won't recommend.\n\nIs this review positive or negative? Elaborate your decision and start the sentence with 'positive' or 'negative'\n\n factors factors factors factors factors factors factors factors factors factors factors factors factors factors factors factors factors factors factors factors"]

In [13]:
output.scores[0].shape

torch.Size([1, 50257])

In [14]:
inp = torch.tensor([tokenizer.encode(text), tokenizer.encode(text)]).to('cuda')

In [15]:
len(tokenizer)

50257

In [16]:
tokenizer.pad_token_id

50256

In [17]:
inp.shape

torch.Size([2, 41])

In [18]:
output = model.generate(inp, max_new_tokens=1, output_scores=True, return_dict_in_generate=True)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [19]:
type(output)

transformers.generation.utils.GenerateDecoderOnlyOutput

In [20]:
verb_tok = [[tokenizer.encode('positive')[0]], [tokenizer.encode('negative')[0]]]
verb_tok

[[24561], [31591]]

In [21]:
verb_tok

[[24561], [31591]]

In [22]:
out_res = torch.cat(list(map(lambda i: output.scores[0][:, verb_tok[i]], range(2))), axis=-1)
out_res

tensor([[ 0.0113, -0.0019],
        [ 0.0113, -0.0019]], device='cuda:0', dtype=torch.float16)

In [23]:
out_res = torch.nn.functional.softmax(out_res, dim=1)

In [24]:
torch.sum(out_res, axis=1, dtype=torch.float64).tolist()

[1.0, 1.0]

In [25]:
sum(map(sum, out_res.cpu().tolist()))

2.0

In [26]:
out_res

tensor([[0.5034, 0.4966],
        [0.5034, 0.4966]], device='cuda:0', dtype=torch.float16)

In [27]:
out_res[:, 0]

tensor([0.5034, 0.5034], device='cuda:0', dtype=torch.float16)

In [28]:

# out_res = torch.nn.functional.softmax(out_res, dim=1)
# # TODO: verbalizer for labtokenizer.encode('positive')els
# class_probs_combined: Dict[str, torch.Tensor] = {k:torch.sum(out_res[:, v], axis=-1) for k, v in i_dict.items()}

In [29]:
def infer(model, prompt):
    model.eval()
    with torch.no_grad():
        output = model.generate(prompt, max_new_tokens=1, output_scores=True, return_dict_in_generate=True)
    return output.scores[0]

In [30]:
def calibrate_with_ds(prompt_model, dataloader) -> torch.Tensor:
    r"""Calibrate. See `Paper <https://arxiv.org/abs/2108.02035>`_
    
    Args:
        prompt_model (:obj:`PromptForClassification`): the PromptForClassification model.
        dataloader (:obj:`List`): the dataloader to conduct the calibrate, could be a virtual one, i.e. contain an only-template example.
    
    Return:
        (:obj:`torch.Tensor`) A tensor of shape  (vocabsize) or (mask_num, vocabsize), the logits calculated for each word in the vocabulary
    """
    all_logits = []
    prompt_model.eval()
    for batch in dataloader:
        batch = {k:v.to(prompt_model.device) for k, v in batch.items()}
        logits = infer(model, batch['input_ids'])
        all_logits.append(logits.detach())
    all_logits = torch.cat(all_logits, dim=0)
    return all_logits.mean(dim=0)



In [31]:
tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
dataloader = DataLoader(tokenized_dataset, batch_size=1)

In [32]:
tokenized_dataset

Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 10
})

In [33]:
mask = torch.ones(len(tokenizer))
mask[[element for sublist in verb_tok for element in sublist]] = 0

In [34]:
1 -  mask

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [35]:
def project(
        label_words_ids, # get from above
        label_words_mask, # get from above
        logits,
        **kwargs,
        ):
    label_words_logits = logits[:, label_words_ids]
    # label_words_logits = self.handle_multi_token(label_words_logits, label_words_mask)
    # print(label_words_logits)
    # print(label_words_logits.shape)
    # label_words_logits = label_words_logits.select(dim=-1, index=0)
    # print(label_words_logits.shape)
    # label_words_logits -= 10000*(1-label_words_mask)
    mask = torch.zeros(len(tokenizer)) - 10000
    print(label_words_logits)
    for idx, logit in zip(label_words_ids, label_words_logits[0]):
        mask[idx] = logit
    return mask.unsqueeze(0)
    # label1 = label_words_ids[0]
    # label2 = label_words_ids[1]
    # tmp = []
    # tmp.append(logits.index_select(label1,axis=-1))
    # tmp.append(logits.index_select(label2,axis=-1))
    # label_words_logits = paddle.concat(tmp,axis=1).reshape([logits.shape[0],label_words_ids.shape[0],label_words_ids.shape[1]])
    # label_words_logits -= 10000*(1-label_words_mask)
    # return label_words_logits

In [36]:
def normalize(logits):
    batch_size = logits.shape[0]
    return F.softmax(logits.reshape([batch_size, -1]), dim=-1).reshape(logits.shape)

In [37]:
def calibrate(label_words_probs, calibrate_logits, **kwargs):
    shape = label_words_probs.shape
    assert calibrate_logits.dim() ==  1, "self._calibrate_logits are not 1-d tensor"
    calibrate_label_words_probs = normalize(project([element for sublist in verb_tok for element in sublist], mask, calibrate_logits.unsqueeze(0), **kwargs))
    print(calibrate_label_words_probs)
    assert calibrate_label_words_probs.shape[1:] == label_words_probs.shape[1:] \
            and calibrate_label_words_probs.shape[0]==1, "shape not match"
    print(label_words_probs.device, calibrate_label_words_probs.device)
    print(label_words_probs)
    print((calibrate_label_words_probs+1e-8))
    label_words_probs /= (calibrate_label_words_probs+1e-3)
    # normalize # TODO Test the performance
    print(label_words_probs)
    norm = label_words_probs.reshape(shape[0], -1).sum(dim=-1,keepdim=True) # TODO Test the performance of detaching()
    print(norm)
    label_words_probs = label_words_probs.reshape(shape[0], -1) / norm
    label_words_probs = label_words_probs.reshape(*shape)
    return label_words_probs

In [38]:
type(dataloader)

torch.utils.data.dataloader.DataLoader

In [39]:
torch.where(mask == 1)[0], verb_tok

(tensor([    0,     1,     2,  ..., 50254, 50255, 50256]), [[24561], [31591]])

In [40]:
cc_logits = calibrate_with_ds(model, dataloader)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

In [41]:
output = model.generate(tokenizer.encode(text, return_tensors="pt").to('cuda'), max_new_tokens=1, output_scores=True, return_dict_in_generate=True)
logits = output.scores[0]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [42]:
logits.unsqueeze(0).shape

torch.Size([1, 1, 50257])

In [43]:
a = calibrate(logits.cpu(), cc_logits.cpu())
a

tensor([[ 0.0113, -0.0019]], dtype=torch.float16)
tensor([[0., 0., 0.,  ..., 0., 0., 0.]])
cpu cpu
tensor([[-1.2993e-02, -1.0979e-02,  3.1921e-02,  ..., -3.7537e-02,
          3.0339e-05,  2.5635e-02]], dtype=torch.float16)
tensor([[1.0000e-08, 1.0000e-08, 1.0000e-08,  ..., 1.0000e-08, 1.0000e-08,
         1.0000e-08]])
tensor([[-1.2992e+01, -1.0977e+01,  3.1922e+01,  ..., -3.7531e+01,
          3.0334e-02,  2.5641e+01]], dtype=torch.float16)
tensor([[-2596.]], dtype=torch.float16)


tensor([[ 5.0049e-03,  4.2267e-03, -1.2299e-02,  ...,  1.4458e-02,
         -1.1683e-05, -9.8801e-03]], dtype=torch.float16)

In [44]:
cc_logits

tensor([-1.2985e-02, -1.0963e-02,  3.1891e-02,  ..., -3.7506e-02,
         3.0279e-05,  2.5620e-02], device='cuda:0', dtype=torch.float16)

In [45]:
torch.where(cc_logits > 0)  

(tensor([    2,     5,     6,  ..., 50252, 50255, 50256], device='cuda:0'),)

In [46]:
a[0][[element for sublist in verb_tok for element in sublist]]

tensor([-8.6427e-06,  1.4901e-06], dtype=torch.float16)