In [1]:
import torch
from transformers import BartForCausalLM, BartTokenizer, GenerationConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [3]:
version = "facebook/bart-large-cnn"
ARTICLE_TO_SUMMARIZE = (
    "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
    "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
    "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
)

# BartTokenizer

In [4]:
tokenizer: BartTokenizer = BartTokenizer.from_pretrained(version)
tokenizer

BartTokenizer(name_or_path='facebook/bart-large-cnn', vocab_size=50265, model_max_length=1024, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'sep_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'cls_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True)}, clean_up_tokenization_spaces=True)

## tokenizer([sequence])

In [5]:
inputs = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt", return_length=True).to(device, torch.float16)

print(inputs.keys())
print(inputs["input_ids"])
print(inputs["attention_mask"]) # 对应是否是文字
print(inputs["length"])         # 对应有效文字长度

dict_keys(['input_ids', 'attention_mask', 'length'])
tensor([[    0,  8332,   947,   717,  2305,    24,  1768,     5,   909,  4518,
            11,  1263,     7,  5876,    13,   239,  2372,  2876,  3841,  1274,
             4,    20,  4374,    16,     7,  1888,     5,   810,     9, 12584,
             4,  9221,  5735,  7673,   916,    58,  1768,     7,    28,  2132,
            30,     5,  2572, 10816,    61,    58,   421,     7,    94,   149,
            23,   513, 15372,  3859,     4,     2]], device='cuda:0')
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')
tensor([56], device='cuda:0')


In [6]:
print(inputs["input_ids"])

tensor([[    0,  8332,   947,   717,  2305,    24,  1768,     5,   909,  4518,
            11,  1263,     7,  5876,    13,   239,  2372,  2876,  3841,  1274,
             4,    20,  4374,    16,     7,  1888,     5,   810,     9, 12584,
             4,  9221,  5735,  7673,   916,    58,  1768,     7,    28,  2132,
            30,     5,  2572, 10816,    61,    58,   421,     7,    94,   149,
            23,   513, 15372,  3859,     4,     2]], device='cuda:0')


In [7]:
inputs["input_ids"].device

device(type='cuda', index=0)

# BartForCausalLM

BART decoder with with a language modeling head on top (linear layer with weights tied to the input embeddings).

In [8]:
model: BartForCausalLM = BartForCausalLM.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

BartForCausalLM(
  (model): BartDecoderWrapper(
    (decoder): BartDecoder(
      (embed_tokens): Embedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartDecoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (activation_fn): GELUActivation()
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (encoder_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=

In [9]:
model.eval()
with torch.inference_mode():
    outputs = model(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
    )
outputs
# CausalLMOutputWithCrossAttentions

CausalLMOutputWithCrossAttentions(loss=None, logits=tensor([[[-5.6469e+00, -5.2121e-01,  3.2277e+00,  ..., -1.3092e+00,
           2.5421e-02, -4.8980e-01],
         [-4.2757e+01, -1.3552e+00,  6.3950e+00,  ..., -4.4278e-01,
          -9.5317e-01, -1.0016e+00],
         [-1.2148e+01, -1.4863e+00,  4.4659e+00,  ..., -9.8570e-01,
          -2.0703e+00, -1.7668e+00],
         ...,
         [-5.0549e+00, -1.6935e+00,  3.5246e+00,  ..., -1.6273e+00,
          -2.7711e+00, -2.4662e+00],
         [-5.2792e+00, -1.6367e+00,  2.2745e+00,  ..., -1.5091e+00,
          -2.5233e+00, -2.2698e+00],
         [-4.4182e+00, -1.6875e+00,  3.0842e+00,  ..., -1.3521e+00,
          -2.5146e+00, -2.2789e+00]]], device='cuda:0'), past_key_values=((tensor([[[[ 1.3813,  2.8176,  4.2694,  ..., -1.6488,  2.9690, -2.5577],
          [-1.1923, -2.7534, -1.2879,  ...,  4.1079,  1.2325,  1.2708],
          [-2.0409, -0.2231, -1.1265,  ..., -3.9639, -2.5721, -2.1848],
          ...,
          [ 2.9245,  1.5179, -2.195

In [10]:
logits = outputs.logits
print(logits.shape)
print(logits)

torch.Size([1, 56, 50264])
tensor([[[-5.6469e+00, -5.2121e-01,  3.2277e+00,  ..., -1.3092e+00,
           2.5421e-02, -4.8980e-01],
         [-4.2757e+01, -1.3552e+00,  6.3950e+00,  ..., -4.4278e-01,
          -9.5317e-01, -1.0016e+00],
         [-1.2148e+01, -1.4863e+00,  4.4659e+00,  ..., -9.8570e-01,
          -2.0703e+00, -1.7668e+00],
         ...,
         [-5.0549e+00, -1.6935e+00,  3.5246e+00,  ..., -1.6273e+00,
          -2.7711e+00, -2.4662e+00],
         [-5.2792e+00, -1.6367e+00,  2.2745e+00,  ..., -1.5091e+00,
          -2.5233e+00, -2.2698e+00],
         [-4.4182e+00, -1.6875e+00,  3.0842e+00,  ..., -1.3521e+00,
          -2.5146e+00, -2.2789e+00]]], device='cuda:0')


In [11]:
model.config.vocab_size

50264

## model.generate(效果没有ConditionalGeneration好)

In [12]:
model.eval()
with torch.inference_mode():
    generate_ids = model.generate(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
        # num_beams: beam search num
        generation_config = GenerationConfig(num_beams=2, min_length=0, max_new_tokens=100),
    )
generate_ids

tensor([[    0,  8332,   947,   717,  2305,    24,  1768,     5,   909,  4518,
            11,  1263,     7,  5876,    13,   239,  2372,  2876,  3841,  1274,
             4,    20,  4374,    16,     7,  1888,     5,   810,     9, 12584,
             4,  9221,  5735,  7673,   916,    58,  1768,     7,    28,  2132,
            30,     5,  2572, 10816,    61,    58,   421,     7,    94,   149,
            23,   513, 15372,  3859,     4,     2,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,  

In [13]:
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

['PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow. had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had']

# AutoTokenizer

In [16]:
tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(version)
tokenizer

BartTokenizerFast(name_or_path='facebook/bart-large-cnn', vocab_size=50265, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=False)}, clean_up_tokenization_spaces=True)

## tokenizer([sequence])

In [17]:
inputs = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt", return_length=True).to(device)

print(inputs.keys())
print(inputs["input_ids"])
print(inputs["attention_mask"]) # 对应是否是文字
print(inputs["length"])         # 对应有效文字长度

dict_keys(['input_ids', 'attention_mask', 'length'])
tensor([[    0,  8332,   947,   717,  2305,    24,  1768,     5,   909,  4518,
            11,  1263,     7,  5876,    13,   239,  2372,  2876,  3841,  1274,
             4,    20,  4374,    16,     7,  1888,     5,   810,     9, 12584,
             4,  9221,  5735,  7673,   916,    58,  1768,     7,    28,  2132,
            30,     5,  2572, 10816,    61,    58,   421,     7,    94,   149,
            23,   513, 15372,  3859,     4,     2]], device='cuda:0')
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')
tensor([56], device='cuda:0')


In [18]:
print(inputs["input_ids"])

tensor([[    0,  8332,   947,   717,  2305,    24,  1768,     5,   909,  4518,
            11,  1263,     7,  5876,    13,   239,  2372,  2876,  3841,  1274,
             4,    20,  4374,    16,     7,  1888,     5,   810,     9, 12584,
             4,  9221,  5735,  7673,   916,    58,  1768,     7,    28,  2132,
            30,     5,  2572, 10816,    61,    58,   421,     7,    94,   149,
            23,   513, 15372,  3859,     4,     2]], device='cuda:0')


In [19]:
inputs["input_ids"].device

device(type='cuda', index=0)

# AutoModelForCausalLM

In [20]:
model: AutoModelForCausalLM = AutoModelForCausalLM.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

BartForCausalLM(
  (model): BartDecoderWrapper(
    (decoder): BartDecoder(
      (embed_tokens): Embedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartDecoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (activation_fn): GELUActivation()
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (encoder_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=

In [21]:
model.eval()
with torch.inference_mode():
    outputs = model(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
    )
outputs
# CausalLMOutputWithCrossAttentions

CausalLMOutputWithCrossAttentions(loss=None, logits=tensor([[[-5.6469e+00, -5.2121e-01,  3.2277e+00,  ..., -1.3092e+00,
           2.5421e-02, -4.8980e-01],
         [-4.2757e+01, -1.3552e+00,  6.3950e+00,  ..., -4.4278e-01,
          -9.5317e-01, -1.0016e+00],
         [-1.2148e+01, -1.4863e+00,  4.4659e+00,  ..., -9.8570e-01,
          -2.0703e+00, -1.7668e+00],
         ...,
         [-5.0549e+00, -1.6935e+00,  3.5246e+00,  ..., -1.6273e+00,
          -2.7711e+00, -2.4662e+00],
         [-5.2792e+00, -1.6367e+00,  2.2745e+00,  ..., -1.5091e+00,
          -2.5233e+00, -2.2698e+00],
         [-4.4182e+00, -1.6875e+00,  3.0842e+00,  ..., -1.3521e+00,
          -2.5146e+00, -2.2789e+00]]], device='cuda:0'), past_key_values=((tensor([[[[ 1.3813,  2.8176,  4.2694,  ..., -1.6488,  2.9690, -2.5577],
          [-1.1923, -2.7534, -1.2879,  ...,  4.1079,  1.2325,  1.2708],
          [-2.0409, -0.2231, -1.1265,  ..., -3.9639, -2.5721, -2.1848],
          ...,
          [ 2.9245,  1.5179, -2.195

In [22]:
logits = outputs.logits
print(logits.shape)
print(logits)

torch.Size([1, 56, 50264])
tensor([[[-5.6469e+00, -5.2121e-01,  3.2277e+00,  ..., -1.3092e+00,
           2.5421e-02, -4.8980e-01],
         [-4.2757e+01, -1.3552e+00,  6.3950e+00,  ..., -4.4278e-01,
          -9.5317e-01, -1.0016e+00],
         [-1.2148e+01, -1.4863e+00,  4.4659e+00,  ..., -9.8570e-01,
          -2.0703e+00, -1.7668e+00],
         ...,
         [-5.0549e+00, -1.6935e+00,  3.5246e+00,  ..., -1.6273e+00,
          -2.7711e+00, -2.4662e+00],
         [-5.2792e+00, -1.6367e+00,  2.2745e+00,  ..., -1.5091e+00,
          -2.5233e+00, -2.2698e+00],
         [-4.4182e+00, -1.6875e+00,  3.0842e+00,  ..., -1.3521e+00,
          -2.5146e+00, -2.2789e+00]]], device='cuda:0')


In [23]:
model.config.vocab_size

50264

## model.generate(效果没有ConditionalGeneration好)

In [24]:
model.eval()
with torch.inference_mode():
    generate_ids = model.generate(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
        # num_beams: beam search num
        generation_config = GenerationConfig(num_beams=2, min_length=0, max_new_tokens=100),
    )
generate_ids

tensor([[    0,  8332,   947,   717,  2305,    24,  1768,     5,   909,  4518,
            11,  1263,     7,  5876,    13,   239,  2372,  2876,  3841,  1274,
             4,    20,  4374,    16,     7,  1888,     5,   810,     9, 12584,
             4,  9221,  5735,  7673,   916,    58,  1768,     7,    28,  2132,
            30,     5,  2572, 10816,    61,    58,   421,     7,    94,   149,
            23,   513, 15372,  3859,     4,     2,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,    56,    56,    56,    56,
            56,    56,    56,    56,    56,    56,  

In [25]:
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

['PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow. had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had had']