In [6]:
import torch
from transformers import T5Tokenizer, T5ForQuestionAnswering

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [8]:
version = "google/flan-t5-small"
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

# T5Tokenizer

In [9]:
tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained(version)
tokenizer

T5Tokenizer(name_or_path='google/flan-t5-small', vocab_size=32100, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>', 

## tokenizer([sequence])

# T5ForQuestionAnswering

T5 Model with a span classification head on top for extractive question-answering tasks like SQuAD (linear layers on top of the hidden-states output to compute span start logits and span end logits).

In [12]:
model: T5ForQuestionAnswering = T5ForQuestionAnswering.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

Some weights of T5ForQuestionAnswering were not initialized from the model checkpoint at google/flan-t5-small and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5ForQuestionAnswering(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): Line

## 问题和答案放在一起放入模型

In [None]:
print(tokenizer.tokenize(question))
print(tokenizer.tokenize(text))

['▁Who', '▁was', '▁Jim', '▁He', 'n', 'son', '?']
['▁Jim', '▁He', 'n', 'son', '▁was', '▁', 'a', '▁nice', '▁puppet']


In [None]:
inputs = tokenizer(question, text, return_tensors="pt", return_length=True).to(device, torch.float16)

print(inputs.keys())
print(inputs["input_ids"])
print(inputs["attention_mask"]) # 对应是否是文字
print(inputs["length"])

dict_keys(['input_ids', 'attention_mask', 'length'])
tensor([[ 2645,    47,  6006,   216,    29,   739,    58,     1,  6006,   216,
            29,   739,    47,     3,     9,  1245, 26141,     1]],
       device='cuda:0')
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')
tensor([18], device='cuda:0')


In [13]:
model.eval()
with torch.inference_mode():
    outputs = model(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
    )
outputs

Seq2SeqQuestionAnsweringModelOutput(loss=None, start_logits=tensor([[0.8246, 0.2024, 0.4494, 1.0985, 1.7568, 1.6244, 0.8391, 0.6467, 0.5117,
         0.7120, 1.3877, 1.4462, 0.4365, 0.8379, 1.3385, 0.9377, 1.6594, 1.0729]],
       device='cuda:0'), end_logits=tensor([[-0.4192, -0.2018, -0.4823, -0.9298, -1.0043, -1.0471, -0.6605, -0.7219,
         -0.3110, -0.8652, -1.0088, -1.0676, -0.4647, -0.7009, -1.3603, -0.6891,
         -0.9796, -0.5943]], device='cuda:0'), past_key_values=((tensor([[[[-0.3246, -0.4472,  2.7203,  ...,  0.5856, -0.2654,  0.3955],
          [-0.2813,  1.1287,  1.0805,  ...,  1.9208,  1.5590,  0.3872],
          [ 0.7931,  2.0162,  0.5740,  ...,  0.3320, -0.0778,  0.5524],
          ...,
          [ 0.0793,  0.1356,  0.8429,  ..., -1.8467,  0.1006, -1.1133],
          [ 0.1751, -0.9410, -0.9163,  ..., -1.2283,  1.3642, -1.3440],
          [ 0.5772, -0.5526, -0.9095,  ..., -0.6349, -0.1608,  0.5451]],

         [[ 1.7940,  3.4396,  2.5323,  ..., -0.6185,  1.0407,  0

In [14]:
print(outputs.start_logits.shape)
print(outputs.start_logits)
print(outputs.start_logits.argmax())

torch.Size([1, 18])
tensor([[0.8246, 0.2024, 0.4494, 1.0985, 1.7568, 1.6244, 0.8391, 0.6467, 0.5117,
         0.7120, 1.3877, 1.4462, 0.4365, 0.8379, 1.3385, 0.9377, 1.6594, 1.0729]],
       device='cuda:0')
tensor(4, device='cuda:0')


In [15]:
print(outputs.end_logits.shape)
print(outputs.end_logits)
print(outputs.end_logits.argmax())

torch.Size([1, 18])
tensor([[-0.4192, -0.2018, -0.4823, -0.9298, -1.0043, -1.0471, -0.6605, -0.7219,
         -0.3110, -0.8652, -1.0088, -1.0676, -0.4647, -0.7009, -1.3603, -0.6891,
         -0.9796, -0.5943]], device='cuda:0')
tensor(1, device='cuda:0')


In [16]:
predict_answer_tokens = inputs.input_ids[0, outputs.start_logits.argmax() : outputs.end_logits.argmax() + 1]
predict_answer_tokens

tensor([], device='cuda:0', dtype=torch.int64)

In [17]:
tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)

''

## 问题和答案分开放入模型(结果可能不对)

In [18]:
encoder_inputs = tokenizer(question, return_tensors="pt", return_length=True).to(device)
decoder_inputs = tokenizer(text, return_tensors="pt", return_length=True).to(device)
print(encoder_inputs["length"])
print(decoder_inputs["length"])

tensor([8], device='cuda:0')
tensor([10], device='cuda:0')


In [19]:
with torch.inference_mode():
    outputs = model(
        input_ids = encoder_inputs["input_ids"],
        attention_mask = encoder_inputs["attention_mask"],
        decoder_input_ids = decoder_inputs["input_ids"],
        decoder_attention_mask = decoder_inputs["attention_mask"],
    )

In [20]:
print(outputs.start_logits.shape)
print(outputs.start_logits)
print(outputs.start_logits.argmax())

torch.Size([1, 10])
tensor([[1.0140, 1.0886, 1.2511, 0.9303, 1.2020, 0.7829, 0.6886, 0.9277, 0.9545,
         0.8629]], device='cuda:0')
tensor(2, device='cuda:0')


In [21]:
print(outputs.end_logits.shape)
print(outputs.end_logits)
print(outputs.end_logits.argmax())

torch.Size([1, 10])
tensor([[-1.1406, -1.1446, -1.0910, -0.9245, -0.9295, -0.6052, -0.5428, -0.6322,
         -0.5270, -0.5848]], device='cuda:0')
tensor(8, device='cuda:0')


In [22]:
predict_answer_tokens = decoder_inputs.input_ids[0, outputs.start_logits.argmax() : outputs.end_logits.argmax() + 1]
predict_answer_tokens

tensor([   29,   739,    47,     3,     9,  1245, 26141], device='cuda:0')

In [23]:
tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)

'nson was a nice puppet'