In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

MODEL_NAME = "google/flan-t5-base"
MODELS_DIR = "/home/nub/Bachelor/bachelor-thesis/models"

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    cache_dir=MODELS_DIR,
    use_fast=True
)

model = AutoModelForSeq2SeqLM.from_pretrained(
    MODEL_NAME,
    cache_dir=MODELS_DIR,
    torch_dtype="auto",
    local_files_only=True,
    low_cpu_mem_usage=True,
    device_map="auto"
)

def tokenize(prompt):
    # Model will silently truncate above 512 tokens
    model_inputs = tokenizer(
        prompt,
        truncation=True,
        max_length=tokenizer.model_max_length,
        return_tensors="pt",
        return_offsets_mapping=True,
    )
    return model_inputs

In [8]:
# text = (
#     "Generate a question for the text: Text:"
#     "I went to work on the latest software update. "
#     "Many people are working on it."
#     "It will be released in the next summer."
#     "Speaking of summer, I will go on summer vacation"
#     "I will be going to Germany for 4 days."
#     "Berlin seems to be a nice location at that time of the year."
#     "After the 4 days, I will go to Italy for 2 weeks."
#     "They have the nicest beaches in the summer."
#     "I hope will get a nice tan."
# )
# text = "Repeat this once: ADI-2009-currency-exposures-libor-changes-forward-foreign"
text = "Say: hello"

model_inputs = tokenize(text)
print(model_inputs)
generated = model.generate(model_inputs["input_ids"].to(model.device), attention_mask=model_inputs["attention_mask"].to(model.device), max_length=64)
tokens_in = []
tokens_out = []
for token in generated[0]:
    tokens_out.append(tokenizer.decode(token, skip_special_tokens=False))
for token in model_inputs.input_ids[0]:
    tokens_in.append(tokenizer.decode(token, skip_special_tokens=False))
print(tokenizer.decode(model_inputs.input_ids[0], skip_special_tokens=False))
print(tokenizer.decode(generated[0], skip_special_tokens=False))
print("".join(tokens_out))
print(tokens_in)
print(tokens_out)

{'input_ids': tensor([[10403,    10, 21820,     1]]), 'attention_mask': tensor([[1, 1, 1, 1]]), 'offset_mapping': tensor([[[ 0,  3],
         [ 3,  4],
         [ 4, 10],
         [ 0,  0]]])}
Say: hello</s>
<pad> hello</s>
<pad>hello</s>
['Say', ':', 'hello', '</s>']
['<pad>', 'hello', '</s>']
