In [14]:
import torch
from transformers import T5Tokenizer, T5ForQuestionAnswering
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [16]:
version = "t5-small"
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

# T5Tokenizer

In [17]:
tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained(version)
tokenizer

You are using the legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


T5Tokenizer(name_or_path='t5-small', vocab_size=32100, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>', '<extra_id_4

# T5ForQuestionAnswering

T5 Model with a span classification head on top for extractive question-answering tasks like SQuAD (linear layers on top of the hidden-states output to compute span start logits and span end logits).

In [20]:
model: T5ForQuestionAnswering = T5ForQuestionAnswering.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

Some weights of T5ForQuestionAnswering were not initialized from the model checkpoint at t5-small and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5ForQuestionAnswering(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(

## 问题和答案放在一起放入模型

In [None]:
print(tokenizer.tokenize(question))
print(tokenizer.tokenize(text))

['▁Who', '▁was', '▁Jim', '▁He', 'n', 'son', '?']
['▁Jim', '▁He', 'n', 'son', '▁was', '▁', 'a', '▁nice', '▁puppet']


In [None]:
inputs = tokenizer(question, text, return_tensors="pt", return_length=True).to(device, torch.float16)

print(inputs.keys())
print(inputs["input_ids"])
print(inputs["attention_mask"])
print(inputs["length"])

dict_keys(['input_ids', 'attention_mask', 'length'])
tensor([[ 2645,    47,  6006,   216,    29,   739,    58,     1,  6006,   216,
            29,   739,    47,     3,     9,  1245, 26141,     1]],
       device='cuda:0')
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')
tensor([18], device='cuda:0')


In [21]:
model.eval()
with torch.inference_mode():
    outputs = model(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
    )
outputs
# Seq2SeqQuestionAnsweringModelOutput

Seq2SeqQuestionAnsweringModelOutput(loss=None, start_logits=tensor([[-0.2060, -0.0506, -0.1765, -0.0109,  0.0901, -0.0565, -0.0922, -0.0495,
          0.1362, -0.1868, -0.0685, -0.2342, -0.2255, -0.1427,  0.0752, -0.0768,
          0.0717, -0.0710]], device='cuda:0'), end_logits=tensor([[-0.6412, -0.7758, -0.9114, -1.0989, -1.6091, -1.6224, -0.9075, -0.8519,
         -0.7478, -0.7025, -1.3993, -1.2442, -0.6096, -0.5398, -0.5607, -0.6055,
         -0.6366, -0.5584]], device='cuda:0'), past_key_values=((tensor([[[[-0.4865, -2.3323, -1.1428,  ..., -2.5693, -1.7539, -0.5693],
          [-0.0882,  0.0183, -0.1481,  ...,  0.0296, -1.2338, -0.6804],
          [ 2.6456, -0.7596, -0.0335,  ..., -0.7589, -0.8528,  0.5651],
          ...,
          [ 0.4127,  0.4141, -0.9865,  ..., -1.7384,  0.2710,  0.1541],
          [ 1.3428, -0.8623,  2.1666,  ..., -2.2484,  0.7204,  0.1665],
          [ 1.9160, -0.4646, -0.6616,  ...,  0.7756,  0.9390,  0.0736]],

         [[-0.9565, -2.3581, -0.2478,  ..., 

In [22]:
print(outputs.start_logits.shape)
print(outputs.start_logits)
print(outputs.start_logits.argmax())

torch.Size([1, 18])
tensor([[-0.2060, -0.0506, -0.1765, -0.0109,  0.0901, -0.0565, -0.0922, -0.0495,
          0.1362, -0.1868, -0.0685, -0.2342, -0.2255, -0.1427,  0.0752, -0.0768,
          0.0717, -0.0710]], device='cuda:0')
tensor(8, device='cuda:0')


In [23]:
print(outputs.end_logits.shape)
print(outputs.end_logits)
print(outputs.end_logits.argmax())

torch.Size([1, 18])
tensor([[-0.6412, -0.7758, -0.9114, -1.0989, -1.6091, -1.6224, -0.9075, -0.8519,
         -0.7478, -0.7025, -1.3993, -1.2442, -0.6096, -0.5398, -0.5607, -0.6055,
         -0.6366, -0.5584]], device='cuda:0')
tensor(13, device='cuda:0')


In [24]:
predict_answer_tokens = inputs.input_ids[0, outputs.start_logits.argmax() : outputs.end_logits.argmax() + 1]
predict_answer_tokens

tensor([6006,  216,   29,  739,   47,    3], device='cuda:0')

In [25]:
tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)

'Jim Henson was'

## 问题和答案分开放入模型(结果可能不对)

In [26]:
encoder_inputs = tokenizer(question, return_tensors="pt", return_length=True).to(device)
decoder_inputs = tokenizer(text, return_tensors="pt", return_length=True).to(device)
print(encoder_inputs["length"])
print(decoder_inputs["length"])

tensor([8], device='cuda:0')
tensor([10], device='cuda:0')


In [27]:
with torch.inference_mode():
    outputs = model(
        input_ids = encoder_inputs["input_ids"],
        attention_mask = encoder_inputs["attention_mask"],
        decoder_input_ids = decoder_inputs["input_ids"],
        decoder_attention_mask = decoder_inputs["attention_mask"],
    )

In [28]:
print(outputs.start_logits.shape)
print(outputs.start_logits)
print(outputs.start_logits.argmax())

torch.Size([1, 10])
tensor([[-0.0651,  0.0385, -0.0944, -0.2152, -0.2710, -0.0015, -0.1760,  0.0202,
         -0.0531, -0.0768]], device='cuda:0')
tensor(1, device='cuda:0')


In [29]:
print(outputs.end_logits.shape)
print(outputs.end_logits)
print(outputs.end_logits.argmax())

torch.Size([1, 10])
tensor([[-1.0518, -1.3643, -1.5050, -0.9178, -0.8031, -0.7777, -0.6107, -0.6192,
         -0.5903, -0.6581]], device='cuda:0')
tensor(8, device='cuda:0')


In [30]:
predict_answer_tokens = decoder_inputs.input_ids[0, outputs.start_logits.argmax() : outputs.end_logits.argmax() + 1]
predict_answer_tokens

tensor([  216,    29,   739,    47,     3,     9,  1245, 26141],
       device='cuda:0')

In [31]:
tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)

'Henson was a nice puppet'

# AutoTokenizer

In [49]:
tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(version)
tokenizer

T5TokenizerFast(name_or_path='t5-small', vocab_size=32100, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>', '<extra_i

In [50]:
inputs = tokenizer(question, text, return_tensors="pt", return_length=True).to(device)
inputs

{'input_ids': tensor([[ 2645,    47,  6006,   216,    29,   739,    58,     1,  6006,   216,
            29,   739,    47,     3,     9,  1245, 26141,     1]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}

# AutoModelForQuestionAnswering

In [62]:
model: AutoModelForQuestionAnswering = AutoModelForQuestionAnswering.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

Some weights of T5ForQuestionAnswering were not initialized from the model checkpoint at t5-small and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5ForQuestionAnswering(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(

In [63]:
model.eval()
with torch.inference_mode():
    outputs = model(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
    )

In [64]:
print(outputs.start_logits.shape)
print(outputs.start_logits)
print(outputs.start_logits.argmax())

torch.Size([1, 18])
tensor([[ 0.1545,  0.1410,  0.0748,  0.1216, -0.2071,  0.0335,  0.0392,  0.2151,
          0.0343,  0.1268, -0.1551,  0.0241, -0.0077,  0.0718, -0.0225, -0.1418,
         -0.0813,  0.2080]], device='cuda:0')
tensor(7, device='cuda:0')


In [65]:
print(outputs.end_logits.shape)
print(outputs.end_logits)
print(outputs.end_logits.argmax())

torch.Size([1, 18])
tensor([[-0.3588,  0.3357,  0.0485, -0.2612, -0.1900, -0.5583,  0.2450, -0.2963,
         -0.3768, -0.0044, -0.1077, -0.3103,  0.2763,  0.2214, -0.0901,  0.1150,
          0.0975,  0.2036]], device='cuda:0')
tensor(1, device='cuda:0')


In [66]:
predict_answer_tokens = inputs.input_ids[0, outputs.start_logits.argmax() : outputs.end_logits.argmax() + 1]
predict_answer_tokens

tensor([], device='cuda:0', dtype=torch.int64)

In [67]:
tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)

''