# Pytorch

In [4]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sentences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this.",
]
tokens = [tokenizer.tokenize(sentence) for sentence in sentences]
ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]
print(ids[0])
print(ids[1])

[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
[1045, 5223, 2023, 1012]


In [2]:
import torch

ids = [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
       [1045, 5223, 2023, 1012,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]

input_ids = torch.tensor(ids)

In [5]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.pad_token_id

0

In [7]:
from transformers import AutoModelForSequenceClassification

ids1 = torch.tensor(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]]
)
ids2 = torch.tensor([[1045, 5223, 2023, 1012]])
all_ids = torch.tensor(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
     [1045, 5223, 2023, 1012,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]
)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
print(model(ids1).logits)
print(model(ids2).logits)
print(model(all_ids).logits)

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)
tensor([[ 3.9497, -3.1357]], grad_fn=<AddmmBackward0>)
tensor([[-2.7276,  2.8789],
        [ 1.5444, -1.3998]], grad_fn=<AddmmBackward0>)


In [8]:
all_ids = torch.tensor(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
     [1045, 5223, 2023, 1012,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]
)
attention_mask = torch.tensor(
    [[   1,    1,    1,    1,    1,    1,    1,     1,     1,    1,    1,    1,    1,    1],
     [   1,    1,    1,    1,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]
)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
output1 = model(ids1)
output2 = model(ids2)
print(output1.logits)
print(output2.logits)
output = model(all_ids, attention_mask=attention_mask)
print(output.logits)


tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)
tensor([[ 3.9497, -3.1357]], grad_fn=<AddmmBackward0>)
tensor([[-2.7276,  2.8789],
        [ 3.9497, -3.1357]], grad_fn=<AddmmBackward0>)


In [9]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sentences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this.",
]
print(tokenizer(sentences, padding=True))

{'input_ids': [[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102], [101, 1045, 5223, 2023, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}


# Tensorflow

In [11]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sentences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this.",
]
tokens = [tokenizer.tokenize(sentence) for sentence in sentences]
ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]
print(ids[0])
print(ids[1])

[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
[1045, 5223, 2023, 1012]


In [12]:
import tensorflow as tf

ids = [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
       [1045, 5223, 2023, 1012,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]

input_ids = tf.constant(ids)

In [13]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.pad_token_id

0

In [14]:
from transformers import TFAutoModelForSequenceClassification

ids1 = tf.constant(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]]
)
ids2 = tf.constant([[1045, 5223, 2023, 1012]])
all_ids = tf.constant(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
     [1045, 5223, 2023, 1012,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]
)

model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
print(model(ids1).logits)
print(model(ids2).logits)
print(model(all_ids).logits)





All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


tf.Tensor([[-2.7276235  2.8789377]], shape=(1, 2), dtype=float32)
tf.Tensor([[ 3.949747  -3.1357398]], shape=(1, 2), dtype=float32)
tf.Tensor(
[[-2.7276208  2.8789375]
 [ 1.5444432 -1.3998368]], shape=(2, 2), dtype=float32)


In [15]:
all_ids = tf.constant(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
     [1045, 5223, 2023, 1012,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]
)
attention_mask = tf.constant(
    [[   1,    1,    1,    1,    1,    1,    1,     1,     1,    1,    1,    1,    1,    1],
     [   1,    1,    1,    1,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]
)

In [16]:
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
output1 = model(ids1)
output2 = model(ids2)
print(output1.logits)
print(output2.logits)
output = model(all_ids, attention_mask=attention_mask)
print(output.logits)

All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


tf.Tensor([[-2.7276235  2.8789377]], shape=(1, 2), dtype=float32)
tf.Tensor([[ 3.949747  -3.1357398]], shape=(1, 2), dtype=float32)
tf.Tensor(
[[-2.7276208  2.8789375]
 [ 3.9497473 -3.1357398]], shape=(2, 2), dtype=float32)


In [17]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sentences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this.",
]
print(tokenizer(sentences, padding=True))

{'input_ids': [[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102], [101, 1045, 5223, 2023, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}
