# PyTorch로 RNN, LSTM 구현하기
- 출처 : https://justkode.kr/deep-learning/pytorch-rnn


- PyTorch에서 제공하는 RNN과 관련 API를 이용해 손쉽게 RNN 네트워크를 구축 할 수 있음


### 앞의 두 단어를 보고, 뒤에 나올 단어를 예측하는 모델 작성

In [None]:
# 필요한 모듈 import 및 학습 데이터 입력
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

sentences = ["i like dog", "i love coffee", "i hate milk", "you like cat", "you love milk", "you hate coffee"]
dtype = torch.float

In [None]:
# 전처리
word_list = list(set(" ".join(sentences).split()))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict)

In [None]:
" ".join(sentences)

'i like dog i love coffee i hate milk you like cat you love milk you hate coffee'

In [None]:
word_list

['cat', 'milk', 'like', 'i', 'dog', 'you', 'love', 'hate', 'coffee']

In [None]:
number_dict

{0: 'cat',
 1: 'milk',
 2: 'like',
 3: 'i',
 4: 'dog',
 5: 'you',
 6: 'love',
 7: 'hate',
 8: 'coffee'}

In [None]:
n_class

9

In [None]:
# RNN hyper-parameter 정의

batch_size = len(sentences)
n_step = 2  # 학습 하려고 하는 어절의 길이 - 1
n_hidden = 5  # 은닉층 사이즈

In [None]:
# 학습을 위한 batch 구성 함수 정의
def make_batch(sentences):
  input_batch = []
  target_batch = []

  for sen in sentences:
    word = sen.split() # ["i", "like", "dog"]
    input = [word_dict[n] for n in word[:-1]] # ["i", "like"]
    target = word_dict[word[-1]] # ["dog"]

    '''
      print("word", word) # word ['i', 'like', 'dog']
      print("input", input) # input [3, 2]
      print("target", target) target 4
    '''
    input_batch.append(np.eye(n_class)[input])  # One-Hot Encoding
    target_batch.append(target)
  
  return input_batch, target_batch

In [None]:
# Batch 생성 및 Tensor 화
input_batch, target_batch = make_batch(sentences)
print(input_batch[0], target_batch)
input_batch = torch.tensor(input_batch, dtype=torch.float32, requires_grad=True)
target_batch = torch.tensor(target_batch, dtype=torch.int64)

word ['i', 'like', 'dog']
input [3, 2]
target 4
word ['i', 'love', 'coffee']
input [3, 6]
target 8
word ['i', 'hate', 'milk']
input [3, 7]
target 1
word ['you', 'like', 'cat']
input [5, 2]
target 0
word ['you', 'love', 'milk']
input [5, 6]
target 1
word ['you', 'hate', 'coffee']
input [5, 7]
target 8
[[0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]] [4, 8, 1, 0, 1, 8]


In [None]:
# 모델 정의
class TextRNN(nn.Module):
  def __init__(self):
    super(TextRNN, self).__init__()

    self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.3)
    self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype)) # 자동미분이 되는(requires_grad=True) Tensor를 생성
    self.b = nn.Parameter(torch.randn([n_class]).type(dtype))
    self.Softmax = nn.Softmax(dim=1)

  def forward(self, hidden, X):
    X = X.transpose(0, 1)
    outputs, hidden = self.rnn(X, hidden) # nn.RNN forward 함수 (input: 입력값 X, initial hidden state)
    outputs = outputs[-1]  # 최종 예측 Hidden Layer
    model = torch.mm(outputs, self.W) + self.b  # 최종 예측 최종 출력 층
    return model
	

In [None]:
# 모델 학습
model = TextRNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(500):
  # nn.RNN에 필요한 hidden state 초기값 정의
  hidden = torch.zeros(1, batch_size, n_hidden, requires_grad=True)
  output = model(hidden, input_batch)
  loss = criterion(output, target_batch)

  if (epoch + 1) % 100 == 0:
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
  
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()


Epoch: 0100 cost = 0.231202
Epoch: 0200 cost = 0.036785
Epoch: 0300 cost = 0.017297
Epoch: 0400 cost = 0.010481
Epoch: 0500 cost = 0.007127


In [39]:
# Test
input = [sen.split()[:2] for sen in sentences]
hidden = torch.zeros(1, batch_size, n_hidden, requires_grad=True)

#print(model(hidden, input_batch).data)
#print(model(hidden, input_batch).data.max(1, keepdim=True))

predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]

print(predict.squeeze())
print(number_dict)

print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

torch.return_types.max(
values=tensor([[5.4243],
        [6.9860],
        [7.5515],
        [7.3983],
        [8.3421],
        [8.3252]]),
indices=tensor([[4],
        [8],
        [1],
        [0],
        [1],
        [8]]))
tensor([4, 8, 1, 0, 1, 8])
{0: 'cat', 1: 'milk', 2: 'like', 3: 'i', 4: 'dog', 5: 'you', 6: 'love', 7: 'hate', 8: 'coffee'}
[['i', 'like'], ['i', 'love'], ['i', 'hate'], ['you', 'like'], ['you', 'love'], ['you', 'hate']] -> ['dog', 'coffee', 'milk', 'cat', 'milk', 'coffee']


In [40]:
# TextLSTM 모델 정의

class TextLSTM(nn.Module):
  def __init__(self):
    super(TextLSTM, self).__init__()

    self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, dropout=0.3)
    self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
    self.b = nn.Parameter(torch.randn([n_class]).type(dtype))
    self.Softmax = nn.Softmax(dim=1)

  def forward(self, hidden_and_cell, X):
    X = X.transpose(0, 1)
    outputs, hidden = self.lstm(X, hidden_and_cell) # hidden_and_cell : (hidden state 초기값, cell state 초기값)
    outputs = outputs[-1]  # 최종 예측 Hidden Layer
    model = torch.mm(outputs, self.W) + self.b  # 최종 예측 최종 출력 층
    return model


In [41]:
	# 모델 학습
model = TextLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(500):
  hidden = torch.zeros(1, batch_size, n_hidden, requires_grad=True)
  cell = torch.zeros(1, batch_size, n_hidden, requires_grad=True)
  output = model((hidden, cell), input_batch)
  loss = criterion(output, target_batch)

  if (epoch + 1) % 100 == 0:
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
  
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

Epoch: 0100 cost = 0.604908
Epoch: 0200 cost = 0.159963
Epoch: 0300 cost = 0.046787
Epoch: 0400 cost = 0.025024
Epoch: 0500 cost = 0.016055


In [42]:
input = [sen.split()[:2] for sen in sentences]

hidden = torch.zeros(1, batch_size, n_hidden, requires_grad=True)
cell = torch.zeros(1, batch_size, n_hidden, requires_grad=True)
predict = model((hidden, cell), input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

[['i', 'like'], ['i', 'love'], ['i', 'hate'], ['you', 'like'], ['you', 'love'], ['you', 'hate']] -> ['dog', 'coffee', 'milk', 'cat', 'milk', 'coffee']
