In [5]:
import torch
import torch.nn as nn

# RNN Example

In [7]:
rnn = nn.RNN(3, 3)

# size : (5, 1, 3)
inputs = [torch.randn(1, 3) for _ in range(5)]
print(f'inputs : {inputs}')

# first hidden state
hidden = torch.randn(1, 1, 3)
print(f'first hidden : {hidden}\n')

for i in inputs:
  # 각 iteration 마다 output이 나온다.
  # 각 iteration에서 나온 hidden은 다음 iteration의 hidden으로 대입된다.
  out, hidden = rnn(i.view(1, 1, -1), hidden)
  print(f'out : {out}')
  print(f'hidden : {hidden}\n')

inputs : [tensor([[-2.3100,  0.5152,  1.2212]]), tensor([[-1.0463,  1.3921, -0.3320]]), tensor([[ 0.9150, -0.0415, -0.6433]]), tensor([[-0.4164,  1.1375,  0.8015]]), tensor([[-1.7037, -0.0527,  0.4711]])]
first hidden : tensor([[[-1.1912,  1.1987,  1.0268]]])

out : tensor([[[-0.9381,  0.9179,  0.4670]]], grad_fn=<StackBackward>)
hidden : tensor([[[-0.9381,  0.9179,  0.4670]]], grad_fn=<StackBackward>)

out : tensor([[[-0.2259,  0.8693,  0.6049]]], grad_fn=<StackBackward>)
hidden : tensor([[[-0.2259,  0.8693,  0.6049]]], grad_fn=<StackBackward>)

out : tensor([[[ 0.4133,  0.0309, -0.2637]]], grad_fn=<StackBackward>)
hidden : tensor([[[ 0.4133,  0.0309, -0.2637]]], grad_fn=<StackBackward>)

out : tensor([[[0.7366, 0.0176, 0.4471]]], grad_fn=<StackBackward>)
hidden : tensor([[[0.7366, 0.0176, 0.4471]]], grad_fn=<StackBackward>)

out : tensor([[[ 0.1155,  0.6665, -0.4257]]], grad_fn=<StackBackward>)
hidden : tensor([[[ 0.1155,  0.6665, -0.4257]]], grad_fn=<StackBackward>)



# LSTM Example
RNN의 장기 문맥 의존성 및 gradient vanishing 현상을 해결하고자 **cell** 이라는 개념이 추가되었다.

In [9]:
lstm = nn.LSTM(3, 3)

# size : (5, 1, 3)
inputs = [torch.randn(1, 3) for _ in range(5)]
print(f'inputs : {inputs}')

# first hidden state
hidden = (torch.randn(1, 1, 3), # hidden state
          torch.randn(1, 1, 3)) # cell state
print(f'first hidden : {hidden}\n')

for i in inputs:
  # 각 iteration 마다 output이 나온다.
  # 각 iteration에서 나온 hidden은 다음 iteration의 hidden으로 대입된다.
  out, hidden = lstm(i.view(1, 1, -1), hidden)
  print(f'out : {out}')
  print(f'hidden : {hidden[0]}')
  print(f'cell : {hidden[1]}\n')

inputs : [tensor([[-0.3577, -0.1670, -0.2724]]), tensor([[-0.5947,  0.9907, -0.2995]]), tensor([[ 1.1773,  0.4212, -1.9770]]), tensor([[0.2326, 1.9729, 0.4413]]), tensor([[-0.2317,  0.7693,  0.2085]])]
first hidden : (tensor([[[ 0.1911,  0.1175, -0.4515]]]), tensor([[[ 0.5990, -0.0009,  0.3376]]]))

out : tensor([[[ 0.2051, -0.0994,  0.0920]]], grad_fn=<StackBackward>)
hidden : tensor([[[ 0.2051, -0.0994,  0.0920]]], grad_fn=<StackBackward>)
cell : tensor([[[ 0.3756, -0.1355,  0.2962]]], grad_fn=<StackBackward>)

out : tensor([[[0.1838, 0.0831, 0.2084]]], grad_fn=<StackBackward>)
hidden : tensor([[[0.1838, 0.0831, 0.2084]]], grad_fn=<StackBackward>)
cell : tensor([[[0.3772, 0.1073, 0.5253]]], grad_fn=<StackBackward>)

out : tensor([[[0.0494, 0.0586, 0.0313]]], grad_fn=<StackBackward>)
hidden : tensor([[[0.0494, 0.0586, 0.0313]]], grad_fn=<StackBackward>)
cell : tensor([[[0.2685, 0.1262, 0.1432]]], grad_fn=<StackBackward>)

out : tensor([[[0.1456, 0.3602, 0.1551]]], grad_fn=<StackBackwa

# RNN Neural Network Implementation

In [3]:
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    self.hidden_size = hidden_size

    self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
    self.i2o = nn.Linear(input_size + hidden_size, output)
    self.softmax = nn.LogSoftmax(dim=1)

  def forward(self, input, hidden):
    combined = torch.cat((input, hidden), dim=1)
    hidden = self.i2h(combined)
    output = self.i2o(combined)
    output = self.softmax(output)
    return output, hidden

  def initHidden(self):
    return torch.zeros(1, self.hidden_size)

# RNN Training

In [4]:
criterion = nn.NLLLoss().cuda()
learning_rate = 0.005

rnn = RNN(i, h, o).cuda()

def train(category_tensor, line_tensor):
  hidden = rnn.initHidden().cuda()

  rnn.zero_grad()

  # 하나의 라인에 대한 첫번째 차원 (글자수)
  for i in range(line_tenseor.size()[0]):
    output, hidden = rnn(line_tensor[i].cuda(), hidden)

  loss = criterion(output, category_tensor)
  loss.backward()

  for p in rnn.parameters():
    p.data.dd_(p.grad.data, alpha=-learning_rate)

  return output, loss.item()