## Module Import

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

## Input Data

In [2]:
sentences = [
    "i am happy", "you are happy", "feel good", "you feel good", "i am not sad", "you are not sad", "you feel not bad",
    "i am sad", "you are sad", "feel bad", "you feel bad", "i am not happy", "you are not happy", "i feel not good"
]
targets = [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0] #1 긍정, 0 부정
dtype = torch.float

word_list = list(set(" ".join(sentences).split()))  # 단어 리스트
word_dict = {w: i for i, w in enumerate(word_list)}  # 단어 -> 숫자
number_dict = {i: w for i, w in enumerate(word_list)}  # 숫자 -> 단어
n_class = len(word_dict)  # 단어 갯수

## Make Batch

In [3]:
batch_size = len(sentences)
n_embedding = 4
n_hidden = 10  # 은닉층 사이즈
max_length = 4  # 문장 단어 최대 갯수

def make_batch(sentences, targets):  # 데이터 전처리
    input_batch = []
    input_length = []
    
    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] + 1 for n in word]
        input += [0] * (max_length - len(word))
        input_batch.append(input)
        input_length.append(len(word))
    
    input_batch = torch.LongTensor(input_batch)
    input_length = torch.LongTensor(input_length)
    target_batch = torch.LongTensor(targets)
    input_length, sorted_idx = input_length.sort(0, descending=True)
    input_batch = input_batch[sorted_idx]
    target_batch = target_batch[sorted_idx]
    print("Input batch:", input_batch)
    print("Input length:", input_length)
    print("Target batch:", target_batch)
    return input_batch, input_length, target_batch

input_batch, input_length, target_batch = make_batch(sentences, targets)

Input batch: tensor([[ 7,  3,  1,  4],
        [ 8, 10,  1,  4],
        [ 8,  6,  1,  5],
        [ 7,  3,  1,  2],
        [ 8, 10,  1,  2],
        [ 7,  6,  1,  9],
        [ 7,  3,  2,  0],
        [ 8, 10,  2,  0],
        [ 8,  6,  9,  0],
        [ 7,  3,  4,  0],
        [ 8, 10,  4,  0],
        [ 8,  6,  5,  0],
        [ 6,  9,  0,  0],
        [ 6,  5,  0,  0]])
Input length: tensor([4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2])
Target batch: tensor([1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0])


## Make Model

In [4]:
class TextLSTM(nn.Module):
    def __init__(self):
        super(TextLSTM, self).__init__()
        self.embed = nn.Embedding(n_class + 1, n_embedding, padding_idx=0)
        self.lstm = nn.LSTM(input_size=n_embedding,  # input vector의 size
                            hidden_size=n_hidden,  # hidden layer의 size
                            proj_size=5)  # output vector의 size
        self.linear = nn.Linear(5, 2)

    def forward(self, X, lengths):
        embeded = self.embed(X)
        input_batch = pack_padded_sequence(embeded, lengths.tolist(), batch_first=True)
        input_batch, hidden = self.lstm(input_batch)
        output, output_length = pad_packed_sequence(input_batch, batch_first=True)
        output = output[range(output.shape[0]), lengths - 1, :]
        output = self.linear(output)
        
        return output

## Training

In [5]:
model = TextLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)


for epoch in range(100):
    output = model(input_batch, input_length)
    loss = criterion(output, target_batch)

    if (epoch + 1) % 100 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    

model.eval()

test_string = "i am not good"
test_input, test_length, test_target = make_batch([test_string], [1])
predict = model(test_input, test_length).data.max(1, keepdim=True)[1][0][0]

print(test_string, '->', "positive" if predict == 1 else "negative")

Epoch: 0100 cost = 0.030195
Input batch: tensor([[7, 3, 1, 9]])
Input length: tensor([4])
Target batch: tensor([1])
i am not good -> negative
