# L12 RNN

In [1]:
import torch
import numpy as np

BATCH_SIZE = 1
SEQ_LEN = 3
INPUT_SIZE = 4
HIDDEN_SIZE = 2
NUM_LAYERS = 1

# RNNCell
# input.shape = (BATCH_SIZE, INPUT_SIZE)
# hidden.shape = (BATCH_SIZE, HIDDEN_SIZE)
# output.shape = (BATCH_SIZE, HIDDEN_SIZE)
# dataset.shape = (SEQ_LEN, BATCH_SIZE, INPUT_SIZE)

# RNN
# input.shape = (SEQ_LEN, BATCH_SIZE, INPUT_SIZE)
# h_0.shape = (NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)
# output.shape = (SEQ_LEN, BATCH_SIZE, HIDDEN_SIZE)
# h_n.shape = (NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)
# dataset.shape = (SEQ_LEN, BATCH_SIZE, INPUT_SIZE)

In [2]:
# RNNCell
# use tanh in cell
RNNCell = torch.nn.RNNCell(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE)

dataset = torch.randn(SEQ_LEN, BATCH_SIZE, INPUT_SIZE)
hidden = torch.zeros(BATCH_SIZE, HIDDEN_SIZE)
out = torch.tensor(np.array([]))

for idx, input in enumerate(dataset):
  hidden = RNNCell(input, hidden)
  out = torch.cat((out, hidden), dim=0)

print('out:\n\t', out, '\n\t', out.shape, '\nhidden:\n\t', hidden, '\n\t', hidden.shape)

out:
	 tensor([[ 0.9972, -0.9662],
        [ 0.9985, -0.7575],
        [ 0.0586,  0.2201]], dtype=torch.float64, grad_fn=<CatBackward0>) 
	 torch.Size([3, 2]) 
hidden:
	 tensor([[0.0586, 0.2201]], grad_fn=<TanhBackward0>) 
	 torch.Size([1, 2])


In [3]:
# RNN
RNN = torch.nn.RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS)

dataset = torch.randn(SEQ_LEN, BATCH_SIZE, INPUT_SIZE)
hidden = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)

out, hidden = RNN(dataset, hidden)

print('out:\n\t', out, '\n\t', out.shape, '\nhidden:\n\t', hidden, '\n\t', hidden.shape)

out:
	 tensor([[[-0.7730,  0.3682]],

        [[-0.8271,  0.6409]],

        [[-0.9808,  0.9651]]], grad_fn=<StackBackward0>) 
	 torch.Size([3, 1, 2]) 
hidden:
	 tensor([[[-0.9808,  0.9651]]], grad_fn=<StackBackward0>) 
	 torch.Size([1, 1, 2])


In [4]:
# RNN with batch_first=True
RNN = torch.nn.RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, batch_first=True)

dataset = torch.randn(BATCH_SIZE, SEQ_LEN, INPUT_SIZE)
hidden = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)

out, hidden = RNN(dataset, hidden)

print('out:\n\t', out, '\n\t', out.shape, '\nhidden:\n\t', hidden, '\n\t', hidden.shape)

out:
	 tensor([[[-0.8056,  0.0124],
         [-0.9937,  0.9482],
         [-0.8982, -0.3263]]], grad_fn=<TransposeBackward1>) 
	 torch.Size([1, 3, 2]) 
hidden:
	 tensor([[[-0.8982, -0.3263]]], grad_fn=<StackBackward0>) 
	 torch.Size([1, 1, 2])


In [1]:
# Case
import torch
import numpy as np

BATCH_SIZE = 1
SEQ_LEN = 5
INPUT_SIZE = 4
HIDDEN_SIZE = 4
NUM_LAYERS = 1

In [2]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol

one_hot_lookup = [
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

In [3]:
class RNNCellNet(torch.nn.Module):
  def __init__(self, input_size, hidden_size, batch_size):
    super(RNNCellNet, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.batch_size = batch_size
    self.rnncell = torch.nn.RNNCell(input_size=self.input_size, 
                     hidden_size=self.hidden_size)

  def forward(self, input, hidden):
    hidden = self.rnncell(input, hidden)
    return hidden

rnnCellNet = RNNCellNet(INPUT_SIZE, HIDDEN_SIZE, BATCH_SIZE)

In [4]:
class RNNNet(torch.nn.Module):
  def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
    super(RNNNet, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.batch_size = batch_size
    self.num_layers = num_layers
    self.rnn = torch.nn.RNN(input_size=self.input_size, 
                 hidden_size=self.hidden_size, 
                 num_layers=self.num_layers)

  def forward(self, input):
    hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
    out, hidden = self.rnn(input, hidden) # output.shape = (SEQ_LEN, BATCH_SIZE, HIDDEN_SIZE)
                          # h_n.shape = (NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)
    return out.view(-1, self.hidden_size)

rnnNet = RNNNet(INPUT_SIZE, HIDDEN_SIZE, BATCH_SIZE, NUM_LAYERS)

In [5]:
criterion = torch.nn.CrossEntropyLoss() # softmax(x)+log(x)+nn.NLLLoss====>nn.CrossEntropyLoss

In [6]:
# RNNCell
inputs = torch.Tensor(x_one_hot).view(-1, BATCH_SIZE, INPUT_SIZE) # (SEQLEN_SIZE, BATCH_SIZE, INPUT_SIZE)
labels = torch.LongTensor(y_data).view(-1, 1) # (SEQLEN_SIZE * BATCH_SIZE, 1)

optimizer = torch.optim.Adam(rnnCellNet.parameters(), lr=0.1)

for epoch in range(15):
  loss = torch.zeros(1, requires_grad=True)
  hidden = torch.zeros(BATCH_SIZE, HIDDEN_SIZE, requires_grad=True)
  # hidden = hidden.to(device)

  print("Predicted: ", end="")
  for input, label in zip(inputs, labels):
    hidden = rnnCellNet(input, hidden)
    loss = loss + criterion(hidden, label) # not .ietm() # (BATCH_SIZE, HIDDEN_SIZE) vs (1)
    _, idx = hidden.max(dim=1)
    print(idx2char[idx.item()], end="")
  
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(", Epoc[%d/15] loss=%.4f" %(epoch+1, loss.item()))

Predicted: llhhl, Epoc[1/15] loss=6.9730
Predicted: lllll, Epoc[2/15] loss=6.0254
Predicted: ollll, Epoc[3/15] loss=5.5476
Predicted: ollll, Epoc[4/15] loss=5.1594
Predicted: ollll, Epoc[5/15] loss=4.7548
Predicted: ollll, Epoc[6/15] loss=4.3943
Predicted: ollll, Epoc[7/15] loss=4.0873
Predicted: ohlll, Epoc[8/15] loss=3.7992
Predicted: ohlll, Epoc[9/15] loss=3.5108
Predicted: ohlol, Epoc[10/15] loss=3.2392
Predicted: ohlol, Epoc[11/15] loss=3.0250
Predicted: ohlol, Epoc[12/15] loss=2.8440
Predicted: ohlol, Epoc[13/15] loss=2.6681
Predicted: ohlol, Epoc[14/15] loss=2.5375
Predicted: ohlol, Epoc[15/15] loss=2.4447


In [7]:
# RNNCell
inputs = torch.Tensor(x_one_hot).view(-1, BATCH_SIZE, INPUT_SIZE) # (SEQLEN_SIZE, BATCH_SIZE, INPUT_SIZE)
labels = torch.LongTensor(y_data) # (SEQLEN_SIZE * BATCH_SIZE)

optimizer = torch.optim.Adam(rnnNet.parameters(), lr=0.1)

for epoch in range(15):
  print("Predicted: ", end="")
  outputs = rnnNet(inputs)
  loss = criterion(outputs, labels) # not .ietm() # (SEQ_LEN * BATCH_SIZE, HIDDEN_SIZE) vs (SEQ_LEN * BATCH_SIZE)

  _, idx = outputs.max(dim=1)
  idx = idx.data.numpy()
  print(''.join(idx2char[x] for x in idx), end="")

  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(", Epoc[%d/15] loss=%.4f" %(epoch+1, loss.item()))

Predicted: eeeee, Epoc[1/15] loss=1.8637
Predicted: eheee, Epoc[2/15] loss=1.5535
Predicted: eooee, Epoc[3/15] loss=1.3622
Predicted: eoooo, Epoc[4/15] loss=1.2070
Predicted: ooool, Epoc[5/15] loss=1.0314
Predicted: oolol, Epoc[6/15] loss=0.8482
Predicted: ohlol, Epoc[7/15] loss=0.7309
Predicted: ohlol, Epoc[8/15] loss=0.6790
Predicted: ohlol, Epoc[9/15] loss=0.6501
Predicted: ohlol, Epoc[10/15] loss=0.6244
Predicted: ohlol, Epoc[11/15] loss=0.5967
Predicted: ohlol, Epoc[12/15] loss=0.5676
Predicted: ohlol, Epoc[13/15] loss=0.5391
Predicted: ohlol, Epoc[14/15] loss=0.5124
Predicted: ohlol, Epoc[15/15] loss=0.4874
