In [13]:
import torch
import torch.nn as nn


from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import random
import torch.optim.lr_scheduler as lr_scheduler
import math
import nltk.data
import nltk
from sentence_transformers import SentenceTransformer # embedding câu

class LSTMModel(nn.Module):
    def __init__(self, embedding_dim = 384, hidden_dim = 128, output_dim = 1, dropout = 0.2, numlayers = 1, bidirectional = False):
        super(LSTMModel, self).__init__()
        self.num_layers = numlayers
        self.D = 2 if bidirectional else 1

        self.hidden_dim = hidden_dim # có giá trị tự do
        self.embedding_dim = embedding_dim # chiều của embedding, vd: [1,2,3,...300]: 1 embedding có kích thước là 300
        self.lstm = nn.LSTM(embedding_dim, hidden_dim,batch_first=True, num_layers = numlayers, dropout = dropout, bidirectional=bidirectional) # đầu vào của LSTM là có kích thước embedding và đầu ra có kích thước hidden, xây dựng 1 mô hình LSTM
        # input_size = embedding_dim; hidden_size, num_layer
        # tuning ở ngay trên
        self.fc = nn.Linear(self.D*hidden_dim, output_dim) # 1 fully conected để làm đầu ra



    def forward(self, inputs):

        '''
        đầu vào input lần lượt là: batch_size, sequence_length, embedding_dim
        '''
        # inputs = torch.nn.utils.rnn.pack_padded_sequence(inputs, )
        batch_size = inputs.batch_sizes[0].item()#input ở đây chính là 1 batch mà chúng ta cho vào, và tập huấn luyện của chúng ta chứa những inputs này

        hidden = self.init_hidden(batch_size)
        lstm_out, _ = self.lstm(inputs, hidden)

        # Giải nén lstm_out
        padded_outputs, _ = pad_packed_sequence(lstm_out, batch_first=True)


        padded_outputs = padded_outputs[:, -1, :]

        output = self.fc(padded_outputs)
        return output
    def init_hidden(self, batch_size):
        return (torch.zeros(self.D * self.num_layers, batch_size, self.hidden_dim),# gồm 1 phần chứa batch_size 'phần', 'phần' chứa hidden_dim units
                torch.zeros(self.D * self.num_layers, batch_size, self.hidden_dim))


In [14]:
def text2embedding(model_embedding, sentence):
    model = model_embedding
    # Sentences are encoded by calling model.encode()
    embedding = model.encode(sentence)

    return torch.Tensor(embedding) if len(embedding.shape) >= 2 else torch.Tensor([embedding])

In [15]:
model_embedding = SentenceTransformer('paraphrase-MiniLM-L6-v2')
def predict(model,model_embedding,text, speed = 1):
    tokenizer = nltk.data.load('english.pickle')
    tokens = tokenizer.tokenize(text)

    max_sequence_length = math.ceil(len(tokens) / speed)

    indices = random.sample(range(len(tokens)), max_sequence_length)
    indices.sort()
    tokens = [tokens[i] for i in indices]

    new_data = text2embedding(model_embedding,tokens)
    new_data = new_data.unsqueeze(0)

    sequence_lengths = [len(seq) for seq in new_data]

    new_data = pack_padded_sequence(new_data, sequence_lengths, batch_first=True)

    model.eval()
    with torch.no_grad():
        inputs = new_data
        outputs = model(inputs)#gọi hàm forward

        proba_label = torch.sigmoid(outputs)
        predict_label = (torch.sigmoid(outputs) > 0.5).float()


    return proba_label.item(), 'positive' if predict_label == 1 else 'negative'

    

In [16]:
path_best_parameter_model = 'best_params_model.pkl'
model = LSTMModel()
model.load_state_dict(torch.load(path_best_parameter_model))





<All keys matched successfully>

In [17]:
text = '''
As someone who relies on multiple electronic devices daily, I was in need of a reliable and affordable power strip. The AmazonBasics 6-Outlet, 200 Joule Surge Protector Power Strip has proven to be a great solution, providing functionality and peace of mind at an attractive price point.

The AmazonBasics power strip offers six outlets, which is more than enough to accommodate my various devices, including my computer, monitor, speakers, and phone charger. The 2-foot cord is not the longest, but it has been sufficient for my needs and helps to minimize cable clutter.

The 200 Joule surge protection rating provides a basic level of protection for my devices, safeguarding them against power surges and spikes. While it may not be the highest level of protection available, it is suitable for everyday use and offers reassurance that my valuable electronics are secure.

The power strip's design is simple and unobtrusive, making it easy to blend into any room or workspace. The white color and slim profile do not draw attention, allowing it to integrate seamlessly into my setup.

One minor drawback is the lack of USB ports for charging devices directly, but this is not a deal-breaker considering the budget-friendly price and the primary purpose of the power strip.

Overall, the AmazonBasics 6-Outlet, 200 Joule Surge Protector Power Strip is an excellent value, offering reliable functionality and basic surge protection at an affordable price. For those seeking a simple and effective solution for managing multiple devices, this power strip is a solid choice.
'''

In [18]:
predict(model, model_embedding, text)

(0.9763833284378052, 'positive')