In [37]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer, GenerationConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [38]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [39]:
version = "google/flan-t5-small"
encoder_input = "Studies have been shown that owning a dog is good for you"
decoder_input = "Studies show that"

# T5Tokenizer

In [40]:
tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained(version)
tokenizer

T5Tokenizer(name_or_path='google/flan-t5-small', vocab_size=32100, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>', 

## tokenizer([sequence])

In [41]:
tokenizer.tokenize(encoder_input)

['▁Studies',
 '▁have',
 '▁been',
 '▁shown',
 '▁that',
 '▁own',
 'ing',
 '▁',
 'a',
 '▁dog',
 '▁is',
 '▁good',
 '▁for',
 '▁you']

In [42]:
encoder_inputs = tokenizer(
    encoder_input,                      # 句子batch
    truncation = True,                  # 超出max_length截断处理
    padding = True,                     # 填充方式选择 [True, 'longest', 'max_length', 'do_not_pad']
    # max_length = max_length,          # 最长长度,不设置默认为模型最大长度
    add_special_tokens = True,          # text添加特殊key
    return_length = True,               # 返回有效长度
    return_overflowing_tokens = False,  # 返回所有的文本片段（由于文本比较长，默认情况下超过预设截断长度的token会被丢失。如果设置了return_overflowing_tokens=True则会返回所有的token片段）。
    return_tensors = "pt"               # 返回数据格式 np pt tf jax
).to(device, torch.float16)

print(encoder_inputs.keys())
print(encoder_inputs["input_ids"])
print(encoder_inputs["attention_mask"]) # 对应是否是文字
print(encoder_inputs["length"])         # 对应有效文字长度

dict_keys(['input_ids', 'attention_mask', 'length'])
tensor([[6536,   43,  118, 2008,   24,  293,   53,    3,    9, 1782,   19,  207,
           21,   25,    1]], device='cuda:0')
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')
tensor([15], device='cuda:0')


In [43]:
tokenizer.tokenize(decoder_input)

['▁Studies', '▁show', '▁that']

In [44]:
decoder_inputs = tokenizer(
    decoder_input,                      # 句子batch
    truncation = True,                  # 超出max_length截断处理
    padding = True,                     # 填充方式选择 [True, 'longest', 'max_length', 'do_not_pad']
    # max_length = max_length,          # 最长长度,不设置默认为模型最大长度
    add_special_tokens = True,          # text添加特殊key
    return_length = True,               # 返回有效长度
    return_overflowing_tokens = False,  # 返回所有的文本片段（由于文本比较长，默认情况下超过预设截断长度的token会被丢失。如果设置了return_overflowing_tokens=True则会返回所有的token片段）。
    return_tensors = "pt"               # 返回数据格式 np pt tf jax
).to(device)

print(decoder_inputs.keys())
print(decoder_inputs["input_ids"])
print(decoder_inputs["attention_mask"]) # 对应是否是文字
print(decoder_inputs["length"])         # 对应有效文字长度

dict_keys(['input_ids', 'attention_mask', 'length'])
tensor([[6536,  504,   24,    1]], device='cuda:0')
tensor([[1, 1, 1, 1]], device='cuda:0')
tensor([4], device='cuda:0')


# T5ForConditionalGeneration

T5 Model with a language modeling head on top.

In [45]:
model: T5ForConditionalGeneration = T5ForConditionalGeneration.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [46]:
model.eval()
with torch.inference_mode():
    # 可以只输入input而不输入decoder
    outputs = model.generate(
        input_ids = encoder_inputs["input_ids"],
        attention_mask = encoder_inputs["attention_mask"],
        decoder_input_ids = decoder_inputs["input_ids"],
        decoder_attention_mask = decoder_inputs["attention_mask"],
        # num_beams: beam search num
        generation_config = GenerationConfig(num_beams=1, min_length=0, max_new_tokens=100),
    )

In [47]:
print(tokenizer.encode(decoder_input))
print(outputs)

[6536, 504, 24, 1]
tensor([[   0, 6536,  504,   24,    1,   31,    7,    8,  167,    5,    1]],
       device='cuda:0')


In [48]:
print(decoder_input)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

Studies show that
["Studies show that's the most."]


In [49]:
tokenizer.batch_decode([0, 1])

['<pad>', '</s>']

## 可以只输入input而不输入decoder

In [50]:
inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt").to(device)
with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        # num_beams: beam search num
        generation_config = GenerationConfig(num_beams=1, min_length=0, max_new_tokens=100),
    )
print(outputs)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

tensor([[    0,  1474,     3,     9,  4119,    13,     3,   115, 23443,     7,
            15,   139,     3,     9,   508,  3047,    11,   617,     8, 13732,
            12,     8,  3047,     5,     1]], device='cuda:0')
['Pour a cup of bolognese into a large bowl and add the pasta to the bowl.']


# AutoTokenizer

In [51]:
tokenizer = AutoTokenizer.from_pretrained(version)
tokenizer

T5TokenizerFast(name_or_path='google/flan-t5-small', vocab_size=32100, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>

# AutoModelForSeq2SeqLM

In [52]:
model = AutoModelForSeq2SeqLM.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [53]:
model.eval()
inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt").to(device)
with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        # num_beams: beam search num
        generation_config = GenerationConfig(num_beams=1, min_length=0, max_new_tokens=100),
    )
print(outputs)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

tensor([[    0,  1474,     3,     9,  4119,    13,     3,   115, 23443,     7,
            15,   139,     3,     9,   508,  3047,    11,   617,     8, 13732,
            12,     8,  3047,     5,     1]], device='cuda:0')
['Pour a cup of bolognese into a large bowl and add the pasta to the bowl.']
