In [8]:
from torch import load, tensor, cuda, long, no_grad, topk
import transformer_utils
from sentencepiece import SentencePieceProcessor

# Load the SentencePiece model
tokenizer = SentencePieceProcessor()
tokenizer.load('./models/sentencepiece.model')
# Define the model parameters
num_layers                 = 2
embedding_dim              = 128
fully_connected_dim        = 128
num_heads                  = 2
positional_encoding_length = 256

encoder_vocab_size = int(tokenizer.vocab_size())
decoder_vocab_size = encoder_vocab_size

# Initialize the model
transformer = transformer_utils.Transformer(
    num_layers,
    embedding_dim,
    num_heads,
    fully_connected_dim,
    encoder_vocab_size,
    decoder_vocab_size,
    positional_encoding_length,
    positional_encoding_length,
)

path       = 'best_qA_model_106th_epoch.pt'
checkpoint = load(path, map_location=device_, weights_only=True)
transformer.load_state_dict(checkpoint['model_state_dict'])

device_ = 'cuda' if cuda.is_available() else 'cpu'
transformer.to(device_)

example_question = 'beyonce, '

eval_inp     = example_question
eval_tar_inp = 'answer: '

eval_inp     = tokenizer.tokenize(eval_inp)
eval_tar_inp = tokenizer.tokenize(eval_tar_inp)

eval_inp     = tensor(eval_inp, dtype=long, device=device_)
eval_tar_inp = tensor(eval_tar_inp, dtype=long, device=device_)

eval_inp     = eval_inp.unsqueeze(0)
eval_tar_inp = eval_tar_inp.unsqueeze(0)


transformer.eval()
with no_grad():
    eval_preds, _   = transformer(eval_inp, eval_tar_inp)
    _, topk_indices = topk(eval_preds[:, -1, :], k=10, dim=-1)
    topk_indices    = topk_indices.int().tolist()
    decoded_answers = [tokenizer.detokenize([idx]) for idx in topk_indices[0]]

    print(f"\n[Eval Example]:\n{example_question}")
    print(f"Top 10 Predictions: {decoded_answers}\n")



[Eval Example]:
beyonce, 
Top 10 Predictions: ['Per', 'AD', 'action', 'her', '', 'infant', 'the', 'er', 'hand', 'cul']

