In [5]:
import torch
import torch_tensorrt
from transformers import T5Tokenizer

In [23]:
%%capture
tokenizer = T5Tokenizer.from_pretrained("t5-base")

In [24]:
%%capture
backend = "torch_tensorrt"
if torch.cuda.is_available():
    DEV = "cuda"
else:
    DEV = "cpu"

# the model
model = torch.load("fastapi/models/t5-model-finetuned-50-epochs-copy.bin")
model.to(DEV)
model.eval()

In [25]:
def prompt_generate(model, tokenizer, keywords: str) -> str:
    """Generate a prompt based on a set of keywords in the following format:

    f"{word_1} | {word_2}"

    Args:
        keywords (str): a set of words defined above.

    Returns:
        str: the generated sentence from the T5 model.
    """
    input_ids = tokenizer.encode(
        keywords + "</s>", max_length=512, truncation=True, return_tensors="pt"
    )
    input_ids = input_ids.to(DEV)
    outputs = model.generate(input_ids, do_sample=True, max_length=1024)
    output_text = tokenizer.decode(outputs[0])
    return output_text[6:-4]

In [26]:
new_model = torch.compile(
    model,
    backend=backend,
    options={
        "truncate_long_and_double": True,
        "precision": torch.float16,
    },
    dynamic=False,
)

In [29]:
%%time
prompt_generate(model, tokenizer, 'Round | No merger')

CPU times: user 210 ms, sys: 0 ns, total: 210 ms
Wall time: 210 ms


'The image of the galaxy was smooth and round, not merging with any other.'

In [30]:
%%time
prompt_generate(new_model, tokenizer, 'Round | No merger')

CPU times: user 327 ms, sys: 0 ns, total: 327 ms
Wall time: 327 ms


'This galaxy has an image that does not have an edge-on disk, is not merging, and has no bar or spiral arms.'