In [121]:
!pip install transformers==4.11.3

Collecting transformers==4.11.3
  Downloading transformers-4.11.3-py3-none-any.whl (2.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting sacremoses (from transformers==4.11.3)
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Collecting tokenizers<0.11,>=0.10.1 (from transformers==4.11.3)
  Downloading tokenizers-0.10.3.tar.gz (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.7/212.7 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m65.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: tokenizers
  Building wheel f

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

import os

from utils import *

from fine_tune_answer_generation import __load_experiment, get_tokenizer, get_model

[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### load model

In [2]:
base_model = 'potsawee/t5-large-generation-squad-QuestionAnswer' # 't5-base'

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# tokenizer = AutoTokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
# model = AutoModelForSeq2SeqLM.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")

In [4]:
processer = get_tokenizer(base_model)
model = get_model(base_model, device, processer)

In [8]:
model_dir = 't5-base_e12_lr1e-05_eps5e-05_Adam_cosine_batch1'

In [9]:
if os.path.exists(os.path.join(model_dir, 'train.log')):
    # get current epoch
#     current_epoch = 0
#     with open(os.path.join(model_dir, 'train.log')) as f:
#         for line in f:
#             current_epoch += 1
    # get the latest model
    state_dict = torch.load(os.path.join(model_dir, 'latest_model.pt'), map_location=device.type)
    model.load_state_dict(state_dict['weights'])
else: 
    print("model not found")

### load data

In [10]:
data_path = 'Question_Answer_Dataset_v1.2'
feature_cache_path = 'Question_Answer_Dataset_v1.2/features_answers'

# prepare feature data if not yet exist 
if not (os.path.exists(feature_cache_path) and os.path.isfile(feature_cache_path)):
    # use the encoder to get the raw dataset (context are extracted by IR with the K-NN sentence to the QA pair)
    print("processing raw dataset... ")
    raw_dataset = CustomData(data_path, encoder_model, k=1)
    print("computing features...")
    # tokenize
    prepare_features_a(raw_dataset, feature_cache_path, processer, max_len_inp=512,max_len_out=512)
else:
    print("features exists")

# feature dataset
test_dataset = FeatureData(feature_cache_path, 'test', 425) 

features exists
length of feature test set:  425


In [11]:
model.eval()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=1024, out_features=4096, bias=False)
              (wo): Linear(in_features=4096, out_features=1024, bias=False)
              (d

In [38]:
context = r"""The shell almost invariably has a circular opening over which the drumhead is stretched, but the shape of the remainder of the shell varies widely.""".replace('\n', ' ')

In [39]:
inputs = processer(context, return_tensors='pt')

In [40]:
inputs.keys()

dict_keys(['input_ids', 'attention_mask'])

In [41]:
input_ids = inputs['input_ids'].to(device)

In [42]:
outputs = model.generate(input_ids)

In [43]:
question_answer = processer.decode(outputs[0], skip_special_tokens=False)
question_answer = question_answer.replace(processer.pad_token, "").replace(processer.eos_token, "")
question, answer = question_answer.split(processer.sep_token)

In [44]:
question 

' What is the most common shape of  drum shell?'

In [30]:
answer 

' drums'

### answering

In [7]:
question_answerer = pipeline("question-answering", model='t5-base')

Some weights of T5ForQuestionAnswering were not initialized from the model checkpoint at t5-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [4]:
question_answerer.model

T5ForQuestionAnswering(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dropout

In [8]:
question

'What type of vegetation do giraffes prefer?'

In [9]:
context

'Giraffes can inhabit savannas, grasslands, or open woodlands. They prefer areas enriched with acacia growth. They drink large quantities of water and, as a result, they can spend long periods of time in dry, arid areas. When searching for more food they will venture into areas with denser foliage.'

In [10]:
question_answerer(question=question, context=context)

{'score': 0.000419730378780514,
 'start': 0,
 'end': 30,
 'answer': 'Giraffes can inhabit savannas,'}

In [13]:
question = "Is the bear white?"
context = "the bear is white"

In [16]:
inputs = f"question: {question} context: {context}"
inputs

'question: Is the bear white? context: the bear is white'

In [17]:
question_answerer(question=question, context=context)

{'score': 0.0747537910938263, 'start': 0, 'end': 8, 'answer': 'the bear'}