In [52]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")

input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Das Haus ist wunderbar.


In [53]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

list_of_question_answers_examples=["What color is the sky? The sky is [mask] in color.",
                                   "What color is the car? The car is [mask] in color.",
                                   "What color is the table? The table is [mask] in color.",
                                   "What color is the laptop? The laptop is [mask] in color."]
input_examples_for_t5 = "".join([sentence+" " for sentence in list_of_question_answers_examples])
input_to_predict = "What color is the jacket? <extra_id_0>"

input_ids = tokenizer(input_examples_for_t5+input_to_predict, return_tensors="pt").input_ids

sequence_ids = model.generate(input_ids)
sequences = tokenizer.batch_decode(sequence_ids)
sequences

['<pad> <extra_id_0> The jacket is [mask] in color.</s>']

In [54]:
list_of_question_answers_examples = [
    "What is the capital of Australia? The capital of Australia is [mask].",
    "What is the process of photosynthesis? The process of photosynthesis is [mask].",
    "What is the speed of sound? The speed of sound is [mask].",
    "What is the founder of Microsoft known for? The founder of Microsoft is known for [mask].",
    "What is the man wearing? The man is wearing [mask]."
]
input_examples_for_t5 = "".join([sentence+" " for sentence in list_of_question_answers_examples])
input_to_predict1 = "What is the president of Peru talking about? <extra_id_0>"
input_to_predict2 = "What is the specie of flower in the picture? <extra_id_0>"

input_ids = tokenizer(input_examples_for_t5+input_to_predict2, return_tensors="pt").input_ids

sequence_ids = model.generate(input_ids)
sequences = tokenizer.batch_decode(sequence_ids)
sequences

['<pad> <extra_id_0> The specie of flower is [mask].</s>']