In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [9]:
def make_batch():
    input_batch = []
    target_batch = []
    
    input_vectors = np.eye(vocab_size)
    for sentence in sentences:
        words = sentence.split()
        input = [word_to_idx[word] for word in words[:-1]] # 0~(n-1)번째
        target = word_to_idx[words[-1]] # n번째
        
        input_batch.append(input_vectors[input])
        target_batch.append(target)
    
    return input_batch, target_batch

In [50]:
class TextRNN(nn.Module):
    def __init__(self):
        super(TextRNN, self).__init__()
        self.rnn = nn.RNN(input_size=vocab_size, hidden_size=n_hidden)
        self.W = nn.Linear(n_hidden, vocab_size, bias=False)
        self.b = nn.Parameter(torch.ones([vocab_size]))
        
    def forward(self, hidden, X):
        # X: (batch_size, n_step, vocab_size)
        # hidden: (1, batch_size, n_hidden)
        
        X = X.transpose(0, 1) # (n_step, batch_size, vocab_size)
        outputs, hidden = self.rnn(X, hidden)
        # outputs: (n_step, batch_size, n_hidden)
        # hidden: (1, batch_size, n_hidden)
        
        outputs = outputs[-1] # (batch_size, n_hidden)
        result = self.W(outputs) + self.b # (n_step, batch_size, vocab_size)
        
        return result

In [51]:
n_step = 2 # number of cells(steps)
n_hidden = 5 # number of hidden units in one cell

In [52]:
sentences = ["i like dog", 'i love coffee', 'i hate milk']

word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_to_idx = {word: idx for idx, word in enumerate(word_list)}
idx_to_word = {idx: word for idx, word in enumerate(word_list)}
vocab_size = len(word_list)
batch_size = len(sentences)

In [53]:
model = TextRNN()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [54]:
input_batch, target_batch = make_batch()
input_batch = torch.FloatTensor(input_batch)
target_batch = torch.LongTensor(target_batch)

In [57]:
# Training

EPOCHS = 5000

for epoch in range(EPOCHS):
    optimizer.zero_grad()
    
    hidden = torch.zeros(1, batch_size, n_hidden)
    output = model(hidden, input_batch)
    
    loss = criterion(output, target_batch)
    if (epoch + 1) % 1000 == 0:
        print('Epoch: {:4d}  loss: {:.6f}'.format(epoch + 1, loss))
        
    loss.backward()
    optimizer.step()

Epoch: 1000  loss: 0.027595
Epoch: 2000  loss: 0.011142
Epoch: 3000  loss: 0.005526
Epoch: 4000  loss: 0.003005
Epoch: 5000  loss: 0.001713


In [85]:
# Predict

hidden = torch.zeros(1, batch_size, n_hidden)
predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]

print('original sentence -> predict')
print('============================')
for i, sentence in enumerate(sentences):
    print(sentence, '->' , idx_to_word[predict[i].item()])

original sentence -> predict
i like dog -> dog
i love coffee -> coffee
i hate milk -> milk


In [80]:
# predict

tensor([[3],
        [0],
        [1]])

shape 확인 테스트

In [40]:
# rnn = nn.RNN(input_size=vocab_size, hidden_size=n_hidden)
# W = nn.Linear(n_hidden, vocab_size, bias=False)
# b = nn.Parameter(torch.ones([vocab_size]))

In [41]:
# hidden = torch.zeros(1, batch_size, n_hidden)

In [42]:
# X = input_batch
# print(X.shape)
# X = X.transpose(0, 1)
# print(X.shape)

torch.Size([3, 2, 7])
torch.Size([2, 3, 7])


In [45]:
# outputs, hidden = rnn(X, hidden)
# print(outputs.shape)
# print(hidden.shape)

torch.Size([2, 3, 5])
torch.Size([1, 3, 5])


In [36]:
# outputs = outputs[-1]
# outputs.shape

torch.Size([3, 5])

In [38]:
# # (batch_size, n_hidden) x (n_hidden, vocab_size) = (batch_size, vocab_size)
# W(outputs).shape 

torch.Size([3, 7])

In [49]:
# result = W(outputs) + b
# result.shape

torch.Size([2, 3, 7])