In [30]:
import torch
from transformers import BartForSequenceClassification, BartTokenizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [3]:
version = "valhalla/bart-large-sst2"
sequence = "Hello, my dog is cute"

# BartTokenizer

In [4]:
tokenizer: BartTokenizer = BartTokenizer.from_pretrained(version)
tokenizer

BartTokenizer(name_or_path='valhalla/bart-large-sst2', vocab_size=50265, model_max_length=1024, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'sep_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'cls_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True)}, clean_up_tokenization_spaces=True)

# BartForSequenceClassification

Bart model with a sequence classification/head on top (a linear layer on top of the pooled output) e.g. for GLUE tasks.

## Example of single-label classification:

In [None]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt").to(device, torch.float16)
inputs

In [6]:
model: BartForSequenceClassification = BartForSequenceClassification.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartForSequenceClassification(
  (model): BartModel(
    (shared): Embedding(50265, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): Layer

In [7]:
model.eval()
with torch.inference_mode():
    outputs = model(**inputs)
outputs
# Seq2SeqSequenceClassifierOutput

Seq2SeqSequenceClassifierOutput(loss=None, logits=tensor([[-5.4279,  4.7800]], device='cuda:0'), past_key_values=((tensor([[[[ 2.8342e-01,  4.9777e-01,  1.9274e+00,  ...,  9.6415e-01,
            2.7784e+00, -1.3422e+00],
          [-1.3426e-01,  6.8785e-01,  1.6520e+00,  ...,  9.0165e-01,
            2.3853e+00, -1.6569e+00],
          [-2.1010e+00, -1.4260e+00, -3.0066e+00,  ..., -3.9226e+00,
           -4.3049e+00, -2.2387e+00],
          ...,
          [-1.3579e+00, -1.4037e+00, -2.1351e+00,  ..., -6.4155e-03,
           -3.7234e+00,  1.1005e+00],
          [-2.5448e+00,  3.2675e+00, -3.0178e+00,  ..., -1.3579e+00,
           -7.3932e+00,  1.2371e+00],
          [ 8.6970e-02,  4.3493e+00, -4.9161e-01,  ..., -3.5166e+00,
           -6.3524e+00,  1.4127e+00]],

         [[-2.2210e+00, -2.1735e+00, -2.3492e-01,  ..., -6.5538e-01,
           -8.2326e-02,  9.0707e-01],
          [-1.7537e+00, -2.1657e+00, -1.0708e-01,  ..., -1.3002e+00,
           -1.4732e-01,  1.3812e-01],
          [-

In [8]:
logits = outputs.logits
print(logits.shape)
print(logits)

torch.Size([1, 2])
tensor([[-5.4279,  4.7800]], device='cuda:0')


In [9]:
predicted_class_id = logits.argmax().item()
predicted_class_id

1

In [11]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [10]:
model.config.id2label[predicted_class_id]

'POSITIVE'

## Example of multi-label classification:

In [12]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt").to(device)
inputs

{'input_ids': tensor([[    0, 31414,     6,   127,  2335,    16, 11962,     2]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [14]:
model: BartForSequenceClassification = BartForSequenceClassification.from_pretrained(version, problem_type="multi_label_classification").to(device)
model

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartForSequenceClassification(
  (model): BartModel(
    (shared): Embedding(50265, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): Layer

In [15]:
model.eval()
with torch.inference_mode():
    outputs = model(**inputs)
outputs
# Seq2SeqSequenceClassifierOutput

Seq2SeqSequenceClassifierOutput(loss=None, logits=tensor([[-5.4279,  4.7800]], device='cuda:0'), past_key_values=((tensor([[[[ 2.8342e-01,  4.9777e-01,  1.9274e+00,  ...,  9.6415e-01,
            2.7784e+00, -1.3422e+00],
          [-1.3426e-01,  6.8785e-01,  1.6520e+00,  ...,  9.0165e-01,
            2.3853e+00, -1.6569e+00],
          [-2.1010e+00, -1.4260e+00, -3.0066e+00,  ..., -3.9226e+00,
           -4.3049e+00, -2.2387e+00],
          ...,
          [-1.3579e+00, -1.4037e+00, -2.1351e+00,  ..., -6.4155e-03,
           -3.7234e+00,  1.1005e+00],
          [-2.5448e+00,  3.2675e+00, -3.0178e+00,  ..., -1.3579e+00,
           -7.3932e+00,  1.2371e+00],
          [ 8.6970e-02,  4.3493e+00, -4.9161e-01,  ..., -3.5166e+00,
           -6.3524e+00,  1.4127e+00]],

         [[-2.2210e+00, -2.1735e+00, -2.3492e-01,  ..., -6.5538e-01,
           -8.2326e-02,  9.0707e-01],
          [-1.7537e+00, -2.1657e+00, -1.0708e-01,  ..., -1.3002e+00,
           -1.4732e-01,  1.3812e-01],
          [-

In [20]:
logits = outputs.logits
print(logits.shape)
print(logits)

torch.Size([1, 2])
tensor([[-5.4279,  4.7800]], device='cuda:0')


In [21]:
predicted_class_id = logits.argmax().item()
predicted_class_id

1

In [22]:
model.config.id2label   # 默认还是2个标签

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [23]:
model.config.id2label[predicted_class_id]

'POSITIVE'

# AutoTokenizer

In [26]:
tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(version)
tokenizer

BartTokenizerFast(name_or_path='valhalla/bart-large-sst2', vocab_size=50265, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'sep_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'cls_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True)}, clean_up_tokenization_spaces=True)

# AutoModelForSequenceClassification

## Example of single-label classification:

In [27]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt").to(device)
inputs

{'input_ids': tensor([[    0, 31414,     6,   127,  2335,    16, 11962,     2]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [31]:
model: AutoModelForSequenceClassification = AutoModelForSequenceClassification.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartForSequenceClassification(
  (model): BartModel(
    (shared): Embedding(50265, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): Layer

In [32]:
model.eval()
with torch.inference_mode():
    outputs = model(**inputs)
outputs
# Seq2SeqSequenceClassifierOutput

Seq2SeqSequenceClassifierOutput(loss=None, logits=tensor([[-5.4279,  4.7800]], device='cuda:0'), past_key_values=((tensor([[[[ 2.8342e-01,  4.9777e-01,  1.9274e+00,  ...,  9.6415e-01,
            2.7784e+00, -1.3422e+00],
          [-1.3426e-01,  6.8785e-01,  1.6520e+00,  ...,  9.0165e-01,
            2.3853e+00, -1.6569e+00],
          [-2.1010e+00, -1.4260e+00, -3.0066e+00,  ..., -3.9226e+00,
           -4.3049e+00, -2.2387e+00],
          ...,
          [-1.3579e+00, -1.4037e+00, -2.1351e+00,  ..., -6.4155e-03,
           -3.7234e+00,  1.1005e+00],
          [-2.5448e+00,  3.2675e+00, -3.0178e+00,  ..., -1.3579e+00,
           -7.3932e+00,  1.2371e+00],
          [ 8.6970e-02,  4.3493e+00, -4.9161e-01,  ..., -3.5166e+00,
           -6.3524e+00,  1.4127e+00]],

         [[-2.2210e+00, -2.1735e+00, -2.3492e-01,  ..., -6.5538e-01,
           -8.2326e-02,  9.0707e-01],
          [-1.7537e+00, -2.1657e+00, -1.0708e-01,  ..., -1.3002e+00,
           -1.4732e-01,  1.3812e-01],
          [-

In [33]:
logits = outputs.logits
print(logits.shape)
print(logits)

torch.Size([1, 2])
tensor([[-5.4279,  4.7800]], device='cuda:0')


In [34]:
predicted_class_id = logits.argmax().item()
predicted_class_id

1

In [35]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [36]:
model.config.id2label[predicted_class_id]

'POSITIVE'