In [14]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis", device=0)

classifier([
    "I love playing basketball",
    "I hate studying",
])

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9992303848266602},
 {'label': 'NEGATIVE', 'score': 0.9989625215530396}]

# Tokenizer

In [15]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [16]:
raw_inputs = [
    "I love playing basketball",
    "I hate studying",
]

inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
print(inputs)

{'input_ids': tensor([[ 101, 1045, 2293, 2652, 3455,  102],
        [ 101, 1045, 5223, 5702,  102,    0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 0]])}


In [17]:
from transformers import AutoModel 

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModel.from_pretrained(checkpoint)

outputs = model(**inputs) # **inputs passes the content of the dictionary as named arguments to the model forward method
print(outputs)
print(outputs.last_hidden_state.shape)


BaseModelOutput(last_hidden_state=tensor([[[ 0.3825, -0.0303,  0.1281,  ...,  0.2931,  0.7915, -0.4785],
         [ 0.7825,  0.2582, -0.1530,  ...,  0.2646,  0.7143, -0.3490],
         [ 0.9320,  0.5608,  0.4751,  ...,  0.0192,  0.8268, -0.4745],
         [ 0.5742,  0.1464,  0.0363,  ..., -0.0976,  0.3402, -0.5343],
         [ 0.4953, -0.2668,  0.0419,  ..., -0.2285,  0.3109, -1.0238],
         [ 1.0199,  0.1193,  0.0889,  ...,  0.6064,  0.2341, -0.8190]],

        [[-0.0767,  0.8316, -0.1653,  ..., -0.1531, -0.5883, -0.0515],
         [-0.1225,  0.9392, -0.0536,  ..., -0.3153, -0.4908,  0.2073],
         [-0.1233,  1.0684,  0.0327,  ..., -0.2589, -0.4985,  0.0596],
         [ 0.1809,  1.3178, -0.3163,  ..., -0.3060, -0.0386, -0.2794],
         [ 0.4821,  0.7182, -0.2021,  ..., -0.0219, -0.2646, -0.1810],
         [-0.2504,  0.8853, -0.3587,  ..., -0.3097, -0.3676,  0.0418]]],
       grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)
torch.Size([2, 6, 768])


In [18]:
from transformers import AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

outputs = model(**inputs)
print(outputs)
print(outputs.logits.shape)

SequenceClassifierOutput(loss=None, logits=tensor([[-3.5025,  3.6663],
        [ 3.7671, -3.1028]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
torch.Size([2, 2])


In [19]:
print(outputs.logits)

tensor([[-3.5025,  3.6663],
        [ 3.7671, -3.1028]], grad_fn=<AddmmBackward0>)


In [20]:
import torch

pred = torch.nn.functional.softmax(outputs.logits, dim=-1) # dim=-1 means that the softmax is applied to the last dimension
print(pred)

tensor([[7.6961e-04, 9.9923e-01],
        [9.9896e-01, 1.0375e-03]], grad_fn=<SoftmaxBackward0>)


In [21]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}