In [52]:
import torch
import torch.nn as nn
import pickle
from konlpy.tag import Mecab, Okt

In [53]:
with open('vocab.pkl', 'rb') as f:
    vocab_emotion = pickle.load(f)

In [54]:
len(vocab_emotion)

22560

In [55]:
encode = {token: idx for idx, token in enumerate(vocab_emotion)}
decode = {idx: token for idx, token in enumerate(vocab_emotion)}

UNK = encode.get('<UNK>')
PAD = encode.get('<PAD>')
UNK, PAD

(1, 0)

In [56]:
encode['<PAD>']

0

In [57]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [42]:
class RNNClassifier(nn.Module):
    def __init__(self, n_vocab, embedding_dim, hidden_dim, n_layers, dropout):
        super().__init__()
        self.embedding =nn.Embedding(n_vocab, embedding_dim)
        self.rnn = nn.RNN(embedding_dim,hidden_dim, n_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, 8) #출력 크기 = 7: 7중 클래스 분류
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded)
        output = self.fc(output[:,-1,:])
        output = self.sigmoid(output) #sigmoid
        return output
    
model_emotion = torch.load('model2.pth').to(device)

In [58]:
def pad_sequences_emotion(sequences, max_len, pad_token):
    padded = []
    # for seq in sequences:
    #     if seq is None: # 시퀀스가 none이라면 패딩하지 않고 건너뛰기
    #         continue
    #     if len(seq)<max_len:
    #         seq = seq+[pad_token] * (max_len - len(seq))
    #     else:
    #         seq = seq[:max_len]
    #     padded.append(seq)
    
    seqlen = len(sequences)


    for i in range(seqlen):
        padded.append(sequences[i]) if i < seqlen else padded.append(pad_token)
    return padded

In [59]:
def predict_emotion(model, sentence, max_len, device, pad_token):

    model.eval()

    with torch.no_grad():

        sentence = [encode[word] if word in encode else encode['<UNK>'] for word in sentence]
        padded_sentence = pad_sequences_emotion(sentence, max_len, pad_token)
        padded_sentence = torch.tensor(padded_sentence, dtype=torch.long, device=device).unsqueeze(0)


        output = model(padded_sentence)

        predicted_class = torch.argmax(output).item()
    
    return predicted_class

In [60]:
tokenizer = Okt()

In [61]:
tokenizer.morphs('이것은영화입니다.')

['이', '것', '은', '영화', '입니다', '.']

In [None]:
sentence = '이것은영화입니다.'

In [45]:
predict_emotion(model_emotion, tokenizer.morphs(sentence), 64, device, PAD)

1