In [1]:
import torch
from transformers import GPT2ForQuestionAnswering, GPT2Tokenizer

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [3]:
version = "gpt2"
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

# GPT2Tokenizer

In [4]:
tokenizer: GPT2Tokenizer = GPT2Tokenizer.from_pretrained(version)
tokenizer

GPT2Tokenizer(name_or_path='gpt2', vocab_size=50257, model_max_length=1024, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True)}, clean_up_tokenization_spaces=True)

In [5]:
# 需要手动设置pad_token
tokenizer.pad_token = tokenizer.eos_token

## tokenizer([sequence])

In [8]:
inputs = tokenizer(
    question, text,                     # 句子batch
    return_tensors = "pt"               # 返回数据格式 np pt tf jax
).to(device, torch.float16)    # https://github.com/huggingface/transformers/issues/16359

print(inputs.keys())
print(inputs["input_ids"])
print(inputs["attention_mask"]) # 对应是否是文字

dict_keys(['input_ids', 'attention_mask'])
tensor([[ 8241,   373,  5395,   367, 19069,    30, 18050,   367, 19069,   373,
           257,  3621, 30095]], device='cuda:0')
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')


In [9]:
print(inputs["input_ids"])

tensor([[ 8241,   373,  5395,   367, 19069,    30, 18050,   367, 19069,   373,
           257,  3621, 30095]], device='cuda:0')


# GPT2ForQuestionAnswering

The GPT-2 Model transformer with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layer on top of the hidden-states output to compute span start logits and span end logits).

In [11]:
model: GPT2ForQuestionAnswering = GPT2ForQuestionAnswering.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

Some weights of GPT2ForQuestionAnswering were not initialized from the model checkpoint at gpt2 and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2ForQuestionAnswering(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (qa_outputs): Linear(in_features=768, out_features=2, bias=True)
)

In [12]:
model.eval()
with torch.inference_mode():
    outputs = model(
        input_ids = inputs["input_ids"],
        attention_mask = inputs["attention_mask"],
    )
outputs

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[-2.0179, -4.7696, -4.2005, -4.3155, -5.2689, -6.4017, -4.5318, -8.4996,
         -5.2352, -4.8845, -6.5525, -6.0297, -5.8695]], device='cuda:0'), end_logits=tensor([[ -1.8369,  -4.8851,  -4.1605,  -3.5117,  -4.6512,  -5.3742,  -3.9084,
         -10.2493,  -4.7496,  -4.7177,  -4.3401,  -5.0593,  -5.3947]],
       device='cuda:0'), hidden_states=None, attentions=None)

In [14]:
answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()
answer_start_index, answer_end_index

(tensor(0, device='cuda:0'), tensor(0, device='cuda:0'))

In [15]:
predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
predict_answer_tokens

tensor([8241], device='cuda:0')

In [17]:
tokenizer.decode(predict_answer_tokens)

'Who'