# RNN Cell vs RNN Layer
- <span style = 'font-size:1.3em;line-height:1.5em'>Pytorch는 전체 RNN layer를 다루는 API(RNN, LSTM, GRU)와 각 RNN cell을 다루는 API(RNNCell, LSTMCell, GRUCell)이 별도로 존재합니다.</span>
- <span style = 'font-size:1.3em;line-height:1.5em'>큰 차이는 없으나 Cell을 사용하면 좀더 자유도 있게 사용할 수 있습니다.</span>
- <span style = 'font-size:1.3em;line-height:1.5em'>그러나, 손쉽게 사용하고 싶으시면 RNN Layer API를 사용하시면 되겠습니다.</span>

https://github.com/CarlosJose126/RNNs/blob/main/LSTMCellvsLSTM.ipynb

![fig2](imgs/figure2.png)

In [22]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import math

In [23]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

In [24]:
data_path = 'data'
if not os.path.exists(data_path):
    os.makedirs(data_path)
    
transform = transforms.Compose([transforms.ToTensor(), # 이미지를 텐서로 변경하고
                                transforms.Normalize((0.1307,), # 이미지를 0.1307, 0.3081값으로 normalize
                                                     (0.3081,))
                               ])

trn_dset = datasets.MNIST(root=data_path, train=True, transform=transform, download=True)
tst_dset = datasets.MNIST(root=data_path, train=False, transform=transform, download=False)

In [25]:
# check data 

img, label = trn_dset[0]
print(img.shape, label)

torch.Size([1, 28, 28]) 5


In [26]:
batch_size = 128
trn_loader = DataLoader(trn_dset, batch_size=batch_size, shuffle=True, drop_last=True)
tst_loader = DataLoader(trn_dset, batch_size=batch_size, shuffle=False)
num_epochs = 10
seq_len = 28
input_size = 28
lr = 0.01
loss_func = nn.CrossEntropyLoss()

# MNIST Classification with RNN

![fig3](imgs/figure3.png)

![fig1](imgs/figure1.png)

- <span style = 'font-size:1.3em;line-height:1.5em'>$x_1, x_2, ..., x_{28}$에 매 time의 28차원의 가로 (혹은 세로)을 입력</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>즉, 매 x는 28차원의 data이고, seq의 길이는 28로 MNIST data (28*28 pixel)한개를 입력</span>

## 1. Define Model Structure

### (1) RNN

In [42]:
class MyLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(MyLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(device)
        # Passing in the input and hidden state into the model and  obtaining outputs
        out, (hn, cn) = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # LSTM output: (N, L, D * H_{out})` when ``batch_first=True`` --> N=batch_size, L=seq_length, D=hidden_size
        #Reshaping the outputs such that it can be fit into the fully connected layer
        print('1',out.shape)
        print('2',out[:,-1,:].shape)
        out = self.fc(out[:, -1, :]) # 마지막 sequence의 값만 가져오기
        return out

In [46]:
model_lstm = MyLSTM(input_size=28, hidden_size=256, num_layers=2, num_classes=10).to(device)
print(model_lstm)

lstm_opt = optim.Adam(params = model_lstm.parameters(), lr = lr)

MyLSTM(
  (lstm): LSTM(28, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=256, out_features=10, bias=True)
)


In [47]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(trn_loader):

        images = images.reshape(-1, seq_len, input_size).to(device) # image shape: (128, 28, 28), label: 5 
        labels = labels.to(device)
        
        lstm_opt.zero_grad()
        # Forward pass
        outputs = model_lstm(images)
        break
        # loss = loss_func(outputs, labels)
        # # Backward and optimize
        # loss.backward()
        # lstm_opt.step()

        
    # print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])
1 torch.Size([128, 28, 256])
2 torch.Size([128, 256])


In [45]:
# Test the model
model_lstm.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in tst_loader:
        images = images.reshape(-1, seq_len, input_size).to(device)
        labels = labels.to(device)
        outputs = model_lstm(images)
        _, predicted = torch.max(outputs.data, 1)
        total = total + labels.shape[0]
        correct = correct + (predicted == labels).sum().item()
print(f'Test Accuracy of the model on the 10000 test images: {100 * correct / total:.2f} %')

1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])
2 torch.Size([128, 128])
1 torch.Size([128, 28, 128])

# Practice

<span style = 'font-size:1.3em;line-height:1.5em'>1. RNN으로 같은 MNIST classification task를 수행해보자.  </span>

In [12]:
data_path = 'data'
if not os.path.exists(data_path):
    os.makedirs(data_path)

# transform
transform = transforms.Compose([transforms.ToTensor(), # 이미지를 텐서로 변경하고
                                transforms.Normalize((0.1307,), # 이미지를 0.1307, 0.3081값으로 normalize
                                                     (0.3081,))
                               ])
# load data
trn_dset = datasets.MNIST(root=data_path, train=True, transform=transform, download=True)
tst_dset = datasets.MNIST(root=data_path, train=False, transform=transform, download=False)

# define hyperparameter
batch_size = 128
trn_loader = DataLoader(trn_dset, batch_size=batch_size, shuffle=True, drop_last=True)
tst_loader = DataLoader(trn_dset, batch_size=batch_size, shuffle=False)
num_epochs = 10
seq_len = 28
input_size = 28
lr = 0.01
loss_func = nn.CrossEntropyLoss()

# define model 
class MyRNN(nn.Module):
    def __init__(self, n_classes, input_size, hidden_size, num_layers, seq_len):
        super(MyRNN, self).__init__()
        self.n_classes = n_classes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_len = seq_len
        
        self.rnn = nn.RNN(input_size=self.input_size, 
                          hidden_size=self.hidden_size, 
                          num_layers=self.num_layers, 
                          batch_first=True)
        self.fc1 = nn.Linear(self.hidden_size, 128)
        self.fc2 = nn.Linear(128, self.n_classes)
        
    def forward(self, x):
        h0 = torch.zeros((self.num_layers, x.shape[0], self.hidden_size)).to(device)
        output, hn = self.rnn(x, h0)
        hn = hn.view(-1, self.hidden_size)

        out = F.relu(output[:,-1,:])
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        
        return out
    
# initialize model & optimizer
model_rnn = MyRNN(n_classes=10, input_size=28, hidden_size=128, num_layers=2, seq_len=28).to(device)
print(model_rnn)

rnn_opt = optim.Adam(params = model_rnn.parameters(), lr = lr)

# train
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(trn_loader):

        images = images.reshape(-1, seq_len, input_size).to(device)
        labels = labels.to(device)
        
        rnn_opt.zero_grad()
        # Forward pass
        outputs = model_rnn(images)
        loss = loss_func(outputs, labels)
        # Backward and optimize
        loss.backward()
        lstm_opt.step()

        
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


MyRNN(
  (rnn): RNN(28, 128, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=128, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)
Epoch [1/10], Loss: 2.3113
Epoch [2/10], Loss: 2.3120
Epoch [3/10], Loss: 2.3089
Epoch [4/10], Loss: 2.2979
Epoch [5/10], Loss: 2.3101
Epoch [6/10], Loss: 2.3095
Epoch [7/10], Loss: 2.3098
Epoch [8/10], Loss: 2.3091
Epoch [9/10], Loss: 2.3064
Epoch [10/10], Loss: 2.2970


In [13]:
# Test the model
model_rnn.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in tst_loader:
        images = images.reshape(-1, seq_len, input_size).to(device)
        labels = labels.to(device)
        outputs = model_rnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total = total + labels.shape[0]
        correct = correct + (predicted == labels).sum().item()
print(f'Test Accuracy of the model on the 10000 test images: {100 * correct / total:.2f} %')

Test Accuracy of the model on the 10000 test images: 6.91 %


# Assignment

<span style = 'font-size:1.3em;line-height:1.5em'>1. GRU로 같은 MNIST classification task를 수행해보자.  </span>

In [17]:
data_path = 'data'
if not os.path.exists(data_path):
    os.makedirs(data_path)

# transform
transform = transforms.Compose([transforms.ToTensor(), # 이미지를 텐서로 변경하고
                                transforms.Normalize((0.1307,), # 이미지를 0.1307, 0.3081값으로 normalize
                                                     (0.3081,))
                               ])
# load data
trn_dset = datasets.MNIST(root=data_path, train=True, transform=transform, download=True)
tst_dset = datasets.MNIST(root=data_path, train=False, transform=transform, download=False)

# define hyperparameter
batch_size = 128
trn_loader = DataLoader(trn_dset, batch_size=batch_size, shuffle=True, drop_last=True)
tst_loader = DataLoader(trn_dset, batch_size=batch_size, shuffle=False)
num_epochs = 10
seq_len = 28
input_size = 28
lr = 0.01
loss_func = nn.CrossEntropyLoss()

# define model 
class MyGRU(nn.Module):
    def __init__(self, n_classes, input_size, hidden_size, num_layers, seq_len):
        super(MyGRU, self).__init__()
        self.n_classes = n_classes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_len = seq_len
        
        self.gru = nn.GRU(input_size=self.input_size, 
                          hidden_size=self.hidden_size,
                          num_layers=self.num_layers, 
                          batch_first=True)
        self.fc1 = nn.Linear(self.hidden_size, 128)
        self.fc2 = nn.Linear(128, self.n_classes)
        
    def forward(self, x):
        h0 = torch.zeros((self.num_layers, x.shape[0], self.hidden_size)).to(device) # hidden state 0으로 초기화
        output, (hn) = self.gru(x, (h0))
        hn = hn.view(-1, self.hidden_size)

        out = F.relu(output[:,-1,:])
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        return out
    
# initialize model & optimizer
model_gru = MyGRU(n_classes=10, input_size=28, hidden_size=128, num_layers=2, seq_len=28).to(device)
print(model_rnn)

gru_opt = optim.Adam(params = model_gru.parameters(), lr = lr)

# train
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(trn_loader):

        images = images.reshape(-1, seq_len, input_size).to(device)
        labels = labels.to(device)
        
        rnn_opt.zero_grad()
        # Forward pass
        outputs = model_gru(images)
        loss = loss_func(outputs, labels)
        # Backward and optimize
        loss.backward()
        lstm_opt.step()

        
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


MyRNN(
  (rnn): RNN(28, 128, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=128, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)
Epoch [1/10], Loss: 2.3047
Epoch [2/10], Loss: 2.2982
Epoch [3/10], Loss: 2.3091
Epoch [4/10], Loss: 2.2999
Epoch [5/10], Loss: 2.2969
Epoch [6/10], Loss: 2.2967
Epoch [7/10], Loss: 2.3018
Epoch [8/10], Loss: 2.2937
Epoch [9/10], Loss: 2.3058
Epoch [10/10], Loss: 2.3116


In [18]:
# Test the model
model_gru.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in tst_loader:
        images = images.reshape(-1, seq_len, input_size).to(device)
        labels = labels.to(device)
        outputs = model_gru(images)
        _, predicted = torch.max(outputs.data, 1)
        total = total + labels.shape[0]
        correct = correct + (predicted == labels).sum().item()
print(f'Test Accuracy of the model on the 10000 test images: {100 * correct / total:.2f} %')

Test Accuracy of the model on the 10000 test images: 9.41 %
