In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

sequence = "I've been waiting for a HuggingFace course my whole life."

tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)

input_ids = torch.tensor([ids])
print("Input IDs:", input_ids)

output = model(input_ids)
print("Logits:", output.logits)

  from .autonotebook import tqdm as notebook_tqdm


Input IDs: tensor([[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,
          2026,  2878,  2166,  1012]])
Logits: tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)


In [3]:
input_ids

tensor([[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,
          2026,  2878,  2166,  1012]])

In [4]:
batched_ids = torch.tensor([ids, ids])
output = model(batched_ids)
print("Logits:", output.logits)

Logits: tensor([[-2.7276,  2.8789],
        [-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)


In [9]:
batched_ids

tensor([[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,
          2026,  2878,  2166,  1012],
        [ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,
          2026,  2878,  2166,  1012]])

In [11]:
for id in batched_ids:
    decoded_string = tokenizer.decode(id)
    print(decoded_string)

i've been waiting for a huggingface course my whole life.
i've been waiting for a huggingface course my whole life.


In [17]:
sequence1 = "I've been waiting for a HuggingFace course my whole life."
sequence2 = "I hate this so much!"

tokens1 = tokenizer.tokenize(sequence1)
ids1 = tokenizer.convert_tokens_to_ids(tokens1)

tokens2 = tokenizer.tokenize(sequence2)
ids2 = tokenizer.convert_tokens_to_ids(tokens2)

input_ids1 = torch.tensor([ids1])
output1 = model(input_ids1)
# print("Input IDs:", input_ids)
print("Logits:", output1.logits)

input_ids2 = torch.tensor([ids2])
output2 = model(input_ids2)
print("Logits:", output2.logits)

Logits: tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)
Logits: tensor([[ 3.1931, -2.6685]], grad_fn=<AddmmBackward0>)


In [53]:
#padding them withoput using attention mask
sequence1 = "I've been waiting for a HuggingFace course my whole life."
sequence2 = "I hate this so much!"

tokens1 = tokenizer.tokenize(sequence1)
ids1 = tokenizer.convert_tokens_to_ids(tokens1)

tokens2 = tokenizer.tokenize(sequence2)
ids2 = tokenizer.convert_tokens_to_ids(tokens2)
dst = torch.zeros_like(torch.tensor(ids1)).long()
dst.put_(torch.tensor([range(len(ids2))]), torch.tensor(ids2))  #padding the ids with shortest length according to the longest ids with pytorch.YAY :)
ids2 = dst

batch_size = [ids1, ids2]
output2 = model(torch.tensor(batch_size))
print("Logits:", output2.logits)

# input_ids1 = torch.tensor([ids1])
# input_ids2 = torch.tensor([ids2])

# output2 = model(input_ids2)
# print("Logits:", output2.logits)

Logits: tensor([[-2.7276,  2.8789],
        [ 2.5423, -2.1265]], grad_fn=<AddmmBackward0>)


the result of two cells with addidng and without padding are not the same!

In [70]:
#using attention mask with using attention mask
sequence1 = "I've been waiting for a HuggingFace course my whole life."
sequence2 = "I hate this so much!"

tokens1 = tokenizer.tokenize(sequence1)
ids1 = tokenizer.convert_tokens_to_ids(tokens1)

tokens2 = tokenizer.tokenize(sequence2)
ids2 = tokenizer.convert_tokens_to_ids(tokens2)
dst = torch.zeros_like(torch.tensor(ids1)).long()
dst.put_(torch.tensor([range(len(ids2))]), torch.tensor(ids2))  #padding the ids with shortest length according to the longest ids with pytorch.YAY :)
ids2_pad = dst

mask1 = torch.ones_like(torch.tensor(ids1))
mask2 = torch.ones_like(torch.tensor(ids2))
mask2_zero = torch.zeros_like(torch.tensor(ids2_pad))
mask2_zero.put_(torch.tensor([range(len(ids2))]), mask2)

attention_mask = torch.stack([mask1, mask2_zero])  #NEW
batch_size = [ids1, ids2_pad]
output2 = model(torch.tensor(batch_size), attention_mask=attention_mask)
print("Logits:", output2.logits)

Logits: tensor([[-2.7276,  2.8789],
        [ 3.1931, -2.6685]], grad_fn=<AddmmBackward0>)


  mask2_zero = torch.zeros_like(torch.tensor(ids2_pad))


and also the result is different when we use attenstion mask :)