# L12 RNN

In [1]:
import torch
import numpy as np

BATCH_SIZE = 1
SEQ_LEN = 3
INPUT_SIZE = 4
HIDDEN_SIZE = 2
NUM_LAYERS = 1

# RNNCell
# input.shape = (BATCH_SIZE, INPUT_SIZE)
# hidden.shape = (BATCH_SIZE, HIDDEN_SIZE)
# output.shape = (BATCH_SIZE, HIDDEN_SIZE)
# dataset.shape = (SEQ_LEN, BATCH_SIZE, INPUT_SIZE)

# RNN
# input.shape = (SEQ_LEN, BATCH_SIZE, INPUT_SIZE)
# h_0.shape = (NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)
# output.shape = (SEQ_LEN, BATCH_SIZE, HIDDEN_SIZE)
# h_n.shape = (NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)
# dataset.shape = (SEQ_LEN, BATCH_SIZE, INPUT_SIZE)

In [2]:
# RNNCell
# use tanh in cell
RNNCell = torch.nn.RNNCell(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE)

dataset = torch.randn(SEQ_LEN, BATCH_SIZE, INPUT_SIZE)
hidden = torch.zeros(BATCH_SIZE, HIDDEN_SIZE)
out = torch.tensor(np.array([]))

for idx, input in enumerate(dataset):
  hidden = RNNCell(input, hidden)
  out = torch.cat((out, hidden), dim=0)

print('out:\n\t', out, '\n\t', out.shape, '\nhidden:\n\t', hidden, '\n\t', hidden.shape)

out:
	 tensor([[ 0.6996,  0.7409],
        [ 0.0881, -0.4285],
        [ 0.8729, -0.0198]], dtype=torch.float64, grad_fn=<CatBackward0>) 
	 torch.Size([3, 2]) 
hidden:
	 tensor([[ 0.8729, -0.0198]], grad_fn=<TanhBackward0>) 
	 torch.Size([1, 2])


In [3]:
# RNN
RNN = torch.nn.RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS)

dataset = torch.randn(SEQ_LEN, BATCH_SIZE, INPUT_SIZE)
hidden = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)

out, hidden = RNN(dataset, hidden)

print('out:\n\t', out, '\n\t', out.shape, '\nhidden:\n\t', hidden, '\n\t', hidden.shape)

out:
	 tensor([[[0.1793, 0.4974]],

        [[0.8297, 0.7577]],

        [[0.3866, 0.3424]]], grad_fn=<StackBackward0>) 
	 torch.Size([3, 1, 2]) 
hidden:
	 tensor([[[0.3866, 0.3424]]], grad_fn=<StackBackward0>) 
	 torch.Size([1, 1, 2])


In [4]:
# RNN with batch_first=True
RNN = torch.nn.RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, batch_first=True)

dataset = torch.randn(BATCH_SIZE, SEQ_LEN, INPUT_SIZE)
hidden = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)

out, hidden = RNN(dataset, hidden)

print('out:\n\t', out, '\n\t', out.shape, '\nhidden:\n\t', hidden, '\n\t', hidden.shape)

out:
	 tensor([[[-0.8177, -0.9276],
         [ 0.1515, -0.2327],
         [-0.8414, -0.5485]]], grad_fn=<TransposeBackward1>) 
	 torch.Size([1, 3, 2]) 
hidden:
	 tensor([[[-0.8414, -0.5485]]], grad_fn=<StackBackward0>) 
	 torch.Size([1, 1, 2])


In [5]:
# Case
import torch
import numpy as np

BATCH_SIZE = 1
SEQ_LEN = 5
INPUT_SIZE = 4
HIDDEN_SIZE = 4
NUM_LAYERS = 1

In [6]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol

one_hot_lookup = [
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

In [7]:
class RNNCellNet(torch.nn.Module):
  def __init__(self, input_size, hidden_size, batch_size):
    super(RNNCellNet, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.batch_size = batch_size
    self.rnncell = torch.nn.RNNCell(input_size=self.input_size, 
                     hidden_size=self.hidden_size)

  def forward(self, input, hidden):
    hidden = self.rnncell(input, hidden)
    return hidden

rnnCellNet = RNNCellNet(INPUT_SIZE, HIDDEN_SIZE, BATCH_SIZE)

In [8]:
class RNNNet(torch.nn.Module):
  def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
    super(RNNNet, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.batch_size = batch_size
    self.num_layers = num_layers
    self.rnn = torch.nn.RNN(input_size=self.input_size, 
                 hidden_size=self.hidden_size, 
                 num_layers=self.num_layers)

  def forward(self, input):
    hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size) # num_layers, batch_size, hidden_size
    out, hidden = self.rnn(input, hidden) # output.shape = (SEQ_LEN, BATCH_SIZE, HIDDEN_SIZE)
                          # h_n.shape = (NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)
    return out.view(-1, self.hidden_size)

rnnNet = RNNNet(INPUT_SIZE, HIDDEN_SIZE, BATCH_SIZE, NUM_LAYERS)

In [9]:
# RNN with embedding and linear layer
#     o1  o2  o3  o4  o5
#     |   linear layer   |
# h0 -> |    RNN Cell    | -> h5
#     |     Embed     |
#     x1  x2  x3  x4  x5
NUM_CLASS = 4
EMBED_SIZE = 10
class RNNELNet(torch.nn.Module):
  def __init__(self, input_size, hidden_size, num_class, batch_size, 
               embedding_size=10, 
               num_layers=1, batch_first=False):
    super(RNNELNet, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.num_class = num_class
    self.emb = torch.nn.Embedding(input_size, embedding_size)
    self.rnn = torch.nn.RNN(input_size=embedding_size, 
                 hidden_size=self.hidden_size, 
                 num_layers=self.num_layers,
                 batch_first=batch_first)
    self.fc = torch.nn.Linear(hidden_size, num_class)

  def forward(self, input):
    hidden = torch.zeros(self.num_layers, input.size(0), self.hidden_size) # num_layers, batch_size, hidden_size
    out = self.emb(input) # (batch, seqLen, embeddingSize)
    out, hidden = self.rnn(out, hidden) # output.shape = (BATCH_SIZE, SEQ_LEN, HIDDEN_SIZE)
    out = self.fc(out)
    return out.view(-1, self.num_class) # (batch*seqLen, numClass)

rnnELNet = RNNELNet(INPUT_SIZE, 8, NUM_CLASS, BATCH_SIZE, 
           embedding_size=EMBED_SIZE, 
           num_layers=NUM_LAYERS, batch_first=True)

In [10]:
# RNNCell
inputs = torch.Tensor(x_one_hot).view(-1, BATCH_SIZE, INPUT_SIZE) # (SEQLEN_SIZE, BATCH_SIZE, INPUT_SIZE)
labels = torch.LongTensor(y_data).view(-1, 1) # (SEQLEN_SIZE * BATCH_SIZE, 1)

criterion = torch.nn.CrossEntropyLoss() # softmax(x)+log(x)+nn.NLLLoss====>nn.CrossEntropyLoss
optimizer = torch.optim.Adam(rnnCellNet.parameters(), lr=0.1)

for epoch in range(15):
  loss = torch.zeros(1, requires_grad=True)
  hidden = torch.zeros(BATCH_SIZE, HIDDEN_SIZE, requires_grad=True)
  # hidden = hidden.to(device)

  print("Predicted: ", end="")
  for input, label in zip(inputs, labels):
    hidden = rnnCellNet(input, hidden)
    loss = loss + criterion(hidden, label) # not .ietm() # (BATCH_SIZE, HIDDEN_SIZE) vs (1)
    _, idx = hidden.max(dim=1)
    print(idx2char[idx.item()], end="")
  
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(", Epoc[%d/15] loss=%.4f" %(epoch+1, loss.item()))

Predicted: hhhhh, Epoc[1/15] loss=7.6855
Predicted: hhhlh, Epoc[2/15] loss=6.0022
Predicted: ohlll, Epoc[3/15] loss=5.0145
Predicted: ohlll, Epoc[4/15] loss=4.4818
Predicted: ohlll, Epoc[5/15] loss=4.1351
Predicted: ohlll, Epoc[6/15] loss=3.8305
Predicted: ohlll, Epoc[7/15] loss=3.5255
Predicted: ohlll, Epoc[8/15] loss=3.2450
Predicted: ohlll, Epoc[9/15] loss=3.0201
Predicted: ohlll, Epoc[10/15] loss=2.8519
Predicted: ohlll, Epoc[11/15] loss=2.7270
Predicted: ohlll, Epoc[12/15] loss=2.6313
Predicted: ohlll, Epoc[13/15] loss=2.5542
Predicted: ohlll, Epoc[14/15] loss=2.4893
Predicted: ohlll, Epoc[15/15] loss=2.4339


In [11]:
# RNN
inputs = torch.Tensor(x_one_hot).view(-1, BATCH_SIZE, INPUT_SIZE) # (SEQLEN_SIZE, BATCH_SIZE, INPUT_SIZE)
labels = torch.LongTensor(y_data) # (SEQLEN_SIZE * BATCH_SIZE)

criterion = torch.nn.CrossEntropyLoss() # softmax(x)+log(x)+nn.NLLLoss====>nn.CrossEntropyLoss
optimizer = torch.optim.Adam(rnnNet.parameters(), lr=0.1)

for epoch in range(15):
  print("Predicted: ", end="")
  outputs = rnnNet(inputs)
  loss = criterion(outputs, labels) # not .ietm() # (SEQ_LEN * BATCH_SIZE, HIDDEN_SIZE) vs (SEQ_LEN * BATCH_SIZE)

  _, idx = outputs.max(dim=1)
  idx = idx.data.numpy()
  print(''.join(idx2char[x] for x in idx), end="")

  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(", Epoc[%d/15] loss=%.4f" %(epoch+1, loss.item()))

Predicted: hohoo, Epoc[1/15] loss=1.5230
Predicted: hoooo, Epoc[2/15] loss=1.2743
Predicted: ooooo, Epoc[3/15] loss=1.1299
Predicted: ollol, Epoc[4/15] loss=1.0455
Predicted: ollll, Epoc[5/15] loss=0.9752
Predicted: ollll, Epoc[6/15] loss=0.9083
Predicted: ollll, Epoc[7/15] loss=0.8365
Predicted: ohlll, Epoc[8/15] loss=0.7660
Predicted: ohlll, Epoc[9/15] loss=0.7156
Predicted: ohool, Epoc[10/15] loss=0.6895
Predicted: ohool, Epoc[11/15] loss=0.6723
Predicted: ohool, Epoc[12/15] loss=0.6512
Predicted: ohlol, Epoc[13/15] loss=0.6275
Predicted: ohlol, Epoc[14/15] loss=0.6050
Predicted: ohlol, Epoc[15/15] loss=0.5853


In [12]:
# RNN with embedding and linear layer
inputs = torch.LongTensor([x_data]) # (BATCH_SIZE, SEQLEN_SIZE)
labels = torch.LongTensor(y_data) # (SEQLEN_SIZE * BATCH_SIZE)

criterion = torch.nn.CrossEntropyLoss() # softmax(x)+log(x)+nn.NLLLoss====>nn.CrossEntropyLoss
optimizer = torch.optim.Adam(rnnELNet.parameters(), lr=0.1)

for epoch in range(15):
  print("Predicted: ", end="")
  outputs = rnnELNet(inputs)
  loss = criterion(outputs, labels) # not .ietm() # (SEQ_LEN * BATCH_SIZE, HIDDEN_SIZE) vs (SEQ_LEN * BATCH_SIZE)

  _, idx = outputs.max(dim=1)
  idx = idx.data.numpy()
  print(''.join(idx2char[x] for x in idx), end="")

  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(", Epoc[%d/15] loss=%.4f" %(epoch+1, loss.item()))

Predicted: leeeh, Epoc[1/15] loss=1.4109
Predicted: ohlll, Epoc[2/15] loss=0.9156
Predicted: ohlol, Epoc[3/15] loss=0.4931
Predicted: ohlol, Epoc[4/15] loss=0.2606
Predicted: ohlol, Epoc[5/15] loss=0.1204
Predicted: ohlol, Epoc[6/15] loss=0.0583
Predicted: ohlol, Epoc[7/15] loss=0.0312
Predicted: ohlol, Epoc[8/15] loss=0.0177
Predicted: ohlol, Epoc[9/15] loss=0.0106
Predicted: ohlol, Epoc[10/15] loss=0.0066
Predicted: ohlol, Epoc[11/15] loss=0.0044
Predicted: ohlol, Epoc[12/15] loss=0.0030
Predicted: ohlol, Epoc[13/15] loss=0.0021
Predicted: ohlol, Epoc[14/15] loss=0.0015
Predicted: ohlol, Epoc[15/15] loss=0.0011
