In [41]:
import re

import functorch as ft
import torch
import transformers
import general_utils as gu

In [27]:
model = transformers.AutoModelForCausalLM.from_pretrained("distilgpt2").cuda()

In [28]:
tokenizer = transformers.AutoTokenizer.from_pretrained("distilgpt2", padding_side="left")
tokenizer.pad_token = tokenizer.eos_token

In [29]:
tokenizer.pad_token = tokenizer.eos_token
print(vars(tokenizer).keys())
generation_input_ids = tokenizer(["I am a potato", "Are you a potato", "Now that I'm famous I got"], padding=True, return_tensors="pt")
assert any(generation_input_ids.input_ids[:, 0] == tokenizer.eos_token_id)
assert not any(generation_input_ids.input_ids[:, -1] == tokenizer.eos_token_id)
print(input_ids)

{'input_ids': tensor([[50256, 50256, 50256,    40,   716,   257, 21219],
        [50256, 50256, 50256,  8491,   345,   257, 21219],
        [ 3844,   326,   314,  1101,  5863,   314,  1392]]), 'attention_mask': tensor([[0, 0, 0, 1, 1, 1, 1],
        [0, 0, 0, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1]])}


In [30]:
output = model.generate(
    input_ids=generation_input_ids.input_ids.cuda(), 
    attention_mask=generation_input_ids.attention_mask.cuda(), 
    num_beams=4, num_return_sequences=4, do_sample=True, 
    max_length=50, early_stopping=True,
)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [39]:
reshaped = output.reshape(generation_input_ids.input_ids.shape[0], 4, -1)

In [84]:
print(model(input_ids=output, attention_mask=output!=tokenizer.pad_token_id).loss)

None


In [46]:
for bsz in range(reshaped.shape[0]):
    for seq_id in range(reshaped.shape[1]):
        print(bsz, seq_id, re.sub(r"\s+", " ", tokenizer.decode(reshaped[bsz, seq_id]).replace("<|endoftext|>", "")))

0 0 I am a potato farmer, and I am not a potato farmer. I am not a potato farmer. I am not a potato farmer. I am not a potato farmer. I am not a potato farmer. I am not a potato
0 1 I am a potato farmer and a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a
0 2 I am a potato farmer. I am a potato farmer. I am a potato farmer. I am a potato farmer. I am a potato farmer. I am a potato farmer. I am a potato farmer. I am a potato farmer
0 3 I am a potato farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I am a farmer. I
1 0 Are you a potato? 
1 1 Are you a potato? 
1 2 Are you a potato? 
1 3 Are you a potato? 
2 0 Now that I'm famous I got the chance to work with him. He's a great guy, I'll tell you. He's a great guy, I'll tell you. He's a great guy, I'll tell you. He
2 1 Now that I'm famous I got a lot of attention. I'm a huge fan of the show, and I've alwa

In [69]:
def receptor(input_ids, attention_mask):
    print(f"{input_ids.shape = }")
    print(f"{attention_mask.shape = }")
    
    return model(input_ids, attention_mask, return_dict=False)

fn = ft.vmap(
    receptor, 
    in_dims=(1, 1),
)

In [70]:
fn(
    reshaped, 
    reshaped==tokenizer.pad_token_id
)

input_ids.shape = torch.Size([3, 50])
attention_mask.shape = torch.Size([3, 50])


IndexError: dimension specified as -2 but tensor has no dimensions

In [62]:
(reshaped==tokenizer.pad_token_id).shape

torch.Size([3, 4, 50])