# Importing modules

In [1]:


import numpy as np
import math


import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
from torch.utils.data import DataLoader, TensorDataset, Subset

import csv

import multiprocessing as mp
import os
import sys
import copy
import random
import gc
import time
from tqdm import tqdm
from collections import defaultdict

import itertools

import dill

import warnings
warnings.filterwarnings('ignore')

from datasets import load_dataset

import torch
from transformers import BertTokenizer, BertModel
import numpy as np



In [2]:
from model import *

# Checking cuda

In [3]:
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
    device_index = 0
    device = torch.device(f"cuda:{device_index}")
    print('using cuda...')
else:
    device = torch.device("cpu")
    print('using cpu...')

torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

Device 0: NVIDIA GeForce RTX 4090
using cuda...


# Config

In [4]:

max_length = 200

sequence_size =  max_length             
feature_size = 768          
num_layers = 3                      
num_heads = 4                
hidden_activation = 'tanh'
output_activation = 'tanh'
initializer = "xavier_normal"
optimizer = 'adam'
loss = 'mean_squared_error'
bias = False
drop_rate = 0.0
alpha = 0.000001       

num_epochs = 100  
batch_size = 1

model_directory = f'model.pth'

# Building model

In [5]:

# Initialize the model
model = build_model(sequence_size,
                    feature_size,
                    num_layers,
                    num_heads,
                    hidden_activation,
                    output_activation,
                    initializer,
                    optimizer,
                    loss,
                    bias,
                    drop_rate,
                    alpha)


model = model.to(device)

# loading nn models
model_dict = torch.load(model_directory)
model.load_state_dict(model_dict[f'model'])

<All keys matched successfully>

# Inference

In [None]:


# 初始化 BERT tokenizer 和 vectorizer
tokenizer  = BertTokenizer.from_pretrained('bert-base-uncased')
vectorizer = BertModel.from_pretrained('bert-base-uncased')

# 範例句子
sentence = "What is in front of the Notre Dame Main Buildin"

sentence = "[CLS] " + sentence + " [SEP] "

response = ''

for i in range(max_length):

    # Step 1: Tokenize the sentences
    tokenized_sentence = tokenizer(sentence, add_special_tokens=False, padding='max_length', max_length=max_length, truncation=True, return_tensors="pt")
    
    # Step 2: Vectorize the sentences
    input_id           = tokenized_sentence['input_ids']
    attention_mask     = tokenized_sentence['attention_mask']
    with torch.no_grad(): 
        input_vector   = vectorizer(input_id).last_hidden_state 

    input_vector = input_vector.to(device)
    mask_2 = attention_mask[0].unsqueeze(1) * attention_mask[0].unsqueeze(0)
    mask_2 = mask_2.unsqueeze(0).unsqueeze(0)
    mask_2 = mask_2.to(device)
    mask_1 = (mask_2 -1) * 1e20
    mask_1 = mask_1.to(device)

    model.eval()     
    output                 = model(input_vector, (mask_1, mask_2))

    vocab_embeddings       = vectorizer.get_input_embeddings().weight .to(device)
    cos_sim                = F.cosine_similarity(output, vocab_embeddings, dim=1)
    most_similar_token_idx = torch.argmax(cos_sim).item()

    if most_similar_token_idx != 102:
            
        word = tokenizer.convert_ids_to_tokens(most_similar_token_idx)

        sentence += ' ' + word

        response += ' ' + word
        print(response)

    
    
    # vocab = tokenizer.get_vocab()
    # while most_similar_token_idx in vocab and vocab[most_similar_token_idx].startswith('[unused'):
    #     most_similar_token_idx = torch.argmax(cos_sim).item()  # Recompute the most similar token

    # from transformers import GPT2LMHeadModel, GPT2Tokenizer
    # tokenizer_ = GPT2Tokenizer.from_pretrained('gpt2')
    # model_     = GPT2LMHeadModel.from_pretrained('gpt2')
    # output     = model_.generate(torch.tensor([[most_similar_token_idx]]), max_length=20)
    # word       = tokenizer_.decode(output[0], skip_special_tokens=True)
    # print(word)

    


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


 spoke
 spoke lime
 spoke lime lime
 spoke lime lime the
 spoke lime lime the lime
 spoke lime lime the lime the
 spoke lime lime the lime the the
 spoke lime lime the lime the the the
 spoke lime lime the lime the the the the
 spoke lime lime the lime the the the the the
 spoke lime lime the lime the the the the the the
 spoke lime lime the lime the the the the the the the
 spoke lime lime the lime the the the the the the the the
 spoke lime lime the lime the the the the the the the the the
 spoke lime lime the lime the the the the the the the the the the
 spoke lime lime the lime the the the the the the the the the the the
 spoke lime lime the lime the the the the the the the the the the the the
 spoke lime lime the lime the the the the the the the the the the the the the
 spoke lime lime the lime the the the the the the the the the the the the the the
 spoke lime lime the lime the the the the the the the the the the the the the the the
 spoke lime lime the lime the the the the the t