# Pytorch RNN

In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms

In [2]:
inputs = torch.Tensor([1,2,3,4,5,6,7,8,9,10,11,12])
# seq_length : 3, batch_size : 5
# [[1,2,3], [4,5,6],[7,8,9],[10,11,12]]

In [9]:
input_size = 1
seq_length = 3
hidden_size = 2
num_layers = 2
batch_size = 4

RNN을 구현하기 위해서는 input_data(batch_size, sequence_length, input_size)  
output_data(batch_size, sequence_length, hidden_size)

## nn.RNN Basic

Input : input과 hidden_0 이라는 2개의 input을 받음
- input : neural network로 들어가는 sequence input [seq_length, batch size, input size]
- hidden_0 : network의 초기 hidden state [num layers*num directions, batch size, input size]
    - num directions : Bidirectional RNN일 경우 2, 나머지 1
    - hidden_0은 따로 초기화 하지 않으면 Pytorch에 의해 자동으로 모두 0으로 초기화 됨

Output : out과 hidden이라는 2개의 출력을 냄
- out : 마지막 RNN layer로부터 매 timesteps마다의 output
- h_n : 모든 RNN layer로부터 마지막 timestep의 hidden value
    - (num_layers* num_directions, batch, hidden_size)

In [4]:
rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
# batch first : [seq, batch, feature] -> [batch, seq, feature]
# input : input, hidden 0 -> 2개의 input을 받음
# output : out, hidden -> 2개의 출력

In [5]:
# input : (seg_length, batch_size, input_size)
# hidden_0 : network 초기의 hidden state : [num_layers*num directions, batch_size, input_size]

In [None]:
# out : 마지막 RNN layer로부터 매 time step마다의 output (hello -> e, l, l, o)
# h_n : 모든 RNN layer로부터 마지막 timestep의 hidden value(state)
  # [num layers*num directions, batch size, hidden size]

In [6]:
inputs.shape

torch.Size([12])

In [10]:
inputs = inputs.view(batch_size, seq_length, input_size)

In [11]:
inputs.shape

torch.Size([4, 3, 1])

In [15]:
out, hidden = rnn(inputs)
print('out :',out.shape) # [batch size, seq length, num directions*hidden size]
print('hidden :',hidden.shape) # [num layers*num directions, batch size, hidden size]

out : torch.Size([4, 3, 2])
hidden : torch.Size([2, 4, 2])


## Bi-directional RNN

- 시간 순, 시간 역행 순 모두 RNN 적용

In [18]:
bi_rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)

In [17]:
inputs.shape

torch.Size([4, 3, 1])

In [19]:
out, hidden = bi_rnn(inputs)

In [20]:
print(out.shape)
print(hidden.shape)

torch.Size([4, 3, 4])
torch.Size([4, 4, 2])


## Bi-directional RNN 방향 분리

out

In [21]:
out = out.view(batch_size, seq_length, 2, hidden_size) # 2 : 방향이 앞 뒤로 나눠짐
out.shape

torch.Size([4, 3, 2, 2])

In [22]:
out_direc1 = out[:,:,0,:] # 첫번째 방향
out_direc2 = out[:,:,1,:] # 두번째 방향

print(out_direc1.shape)
print(out_direc2.shape)

torch.Size([4, 3, 2])
torch.Size([4, 3, 2])


h_n

In [24]:
hidden = hidden.view(num_layers, 2, batch_size, hidden_size)
print(hidden.shape)

hidden_direc1 = hidden[0,:,:,:]
hidden_direc2 = hidden[1,:,:,:]

print(hidden_direc1.shape)
print(hidden_direc2.shape)

torch.Size([2, 2, 4, 2])
torch.Size([2, 4, 2])
torch.Size([2, 4, 2])


## RNN Aplication

In [25]:
# RNN application

train_data = datasets.MNIST(root='data', train=True, download=True, transform=transforms.ToTensor())
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 39.0MB/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.28MB/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 3.54MB/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 5.75MB/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [27]:
from torch.utils.data import DataLoader
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [49]:
# 수정된 모델 정의에서 input_size를 28로 설정
class RNN(nn.Module):
    def __init__(self, num_classes):
        super(RNN, self).__init__()
        self.input_size = 28  # input_size를 데이터 크기에 맞춤
        self.hidden_size = 64
        self.num_layers = 2
        self.num_classes = num_classes
        self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, self.num_classes)

    def forward(self, x):
        out, hidden = self.rnn(x)
        out = self.fc(out[:, -1, :])  # 마지막 시퀀스만 사용
        return out

In [51]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 나머지 학습 코드
model = RNN(num_classes=10).to(device)
CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters())

In [46]:
total_epochs = 3
sequence_length = 28
input_size = 28

In [53]:
for epoch in range(total_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # seq_length와 input_size에 맞게 reshape
        images = images.reshape(-1, sequence_length, 28).to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = CELoss(outputs, labels)

        adam_optimizer.zero_grad()
        loss.backward()
        adam_optimizer.step()
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, loss.item()))

Epoch [1/3], Loss: 1.3074
Epoch [2/3], Loss: 0.1179
Epoch [3/3], Loss: 0.4045


In [54]:
with torch.no_grad():
    correct = 0
    total = 0

    for images, labels in test_loader:
      images = images.reshape(-1, sequence_length, 28).to(device)
      labels = labels.to(device)

      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

Test Accuracy of the model on the 10000 test images: 81.43 %


In [55]:
class Bi_RNN(nn.Module):
  def __init__(self, num_classes):
    super(Bi_RNN, self).__init__()
    self.input_size = 28
    self.hidden_size = 128
    self.num_layers = 2
    self.RNN = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True, bidirectional=True)
    # [seq, batch,geature] -> [batch, seq, feature]
    self.fc = nn.Linear(self.hidden_size*2, num_classes)

  def forward(self, x):
    out, _ = self.RNN(x)
    out = self.fc(out[:, -1, :])
    return out

In [56]:
model = Bi_RNN(num_classes=10).to(device)
CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters())

# Pytorch LSTM

## nn.LSTM Basic

In [58]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [59]:
inputs = torch.Tensor([1,2,3,4,5,6,7,8,9,10,11,12])

In [60]:
input_size = 1
seq_length = 3
hidden_size = 2
num_layers = 2
batch_size = 4

In [None]:
# nn.LSTM()
# Input : input, hidden_0
  # input : [seq_length, batch_size, input_size]
  # hidden_0 - hidden_init, cell_init
# Output : out, hidden
  # out : RNN과 동일
  # h_n : [num_layers * num_directions, batch, hidden_size] - (hidden, cell)

In [61]:
lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,num_layers=num_layers, batch_first=True)

In [64]:
inputs = inputs.view(batch_size, seq_length, input_size)
inputs.shape

torch.Size([4, 3, 1])

In [69]:
hidden_init = torch.zeros(num_layers, batch_size, hidden_size)
cell_init = torch.zeros(num_layers, batch_size, hidden_size)

out, (hidden, cell) = lstm(inputs, (hidden_init, cell_init))
print('out:', out.shape) # (batch_size, seg_length, num_directions*hidden_size)
print('hidden:', hidden.shape) # [num_directions*num_layers, batch_size, hidden_size]
print('cell:', cell.shape) # [num_directions*num_layers, batch_size, hidden_size]

out: torch.Size([4, 3, 2])
hidden: torch.Size([2, 4, 2])
cell: torch.Size([2, 4, 2])


In [70]:
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transforms.ToTensor())
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transforms.ToTensor())

In [71]:
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

## LSTM application

In [73]:
class LSTM(nn.Module):
  def __init__(self, num_classes):
    super(LSTM, self).__init__()

    self.input_size = 28
    self.hidden_size = 128
    self.num_layers = 2

    self.LSTM = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
    self.fc = nn.Linear(self.hidden_size, num_classes)

  def forward(self, x):
    hidden_init = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
    cell_init = torch.zeros(self.num_layers, x.size(0), self.hidden_size)

    out, hidden = self.LSTM(x, (hidden_init, cell_init))
    # out ; mini batch, seq length, hidden_size
    out = self.fc(out[:, -1, :])
    return out

In [74]:
model = LSTM(num_classes=10).to(device)

CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters())

In [75]:
total_epochs = 3
sequence_length = 28
input_size = 28

In [76]:
for epoch in range(total_epochs):
  for images, labels in train_loader:
    images = images.reshape(-1, sequence_length, input_size)

    outputs = model(images)
    loss = CELoss(outputs, labels)

    adam_optimizer.zero_grad()
    loss.backward()
    adam_optimizer.step()
  print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, loss.item()))

Epoch [1/3], Loss: 0.0161
Epoch [2/3], Loss: 0.0028
Epoch [3/3], Loss: 0.0144


In [77]:
model.eval()
with torch.no_grad():
  correct = 0
  total = 0

  for images, labels in test_loader:
    images = images.reshape(-1, sequence_length, input_size)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

Test Accuracy of the model on the 10000 test images: 98.32 %


## Bidirectional RNN Application

In [79]:
class B1_LSTM(nn.Module):
  def __init__(self, num_classes):
    super(B1_LSTM, self).__init__()
    self.input_size = 28
    self.hidden_size = 128
    self.num_layers = 2
    self.LSTM = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True, bidirectional=True)
    self.fc = nn.Linear(self.hidden_size*2, num_classes)

  def forward(self, x):
    hidden_init = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)
    cell_init = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)
    out, _ = self.LSTM(x, (hidden_init, cell_init)) # [mini-batch, seq-length, hidden-size]
    out = self.fc(out[:, -1, :])
    return out

In [80]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = B1_LSTM(num_classes=10).to(device)

CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [81]:
total_epochs = 3
sequence_length = 28
input_size = 28

In [85]:
total_step = len(train_loader)

for epoch in range(total_epochs):
  for i, (images, labels) in enumerate(train_loader):
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = CELoss(outputs, labels)

    adam_optimizer.zero_grad()
    loss.backward()
    adam_optimizer.step()

    if (i+1) % 5000 == 0:
      print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, i+1, total_step, loss.item()))

Epoch [1/3], Step [5000/15000], Loss: 0.0016
Epoch [1/3], Step [10000/15000], Loss: 0.0003
Epoch [1/3], Step [15000/15000], Loss: 0.0040
Epoch [2/3], Step [5000/15000], Loss: 0.0019
Epoch [2/3], Step [10000/15000], Loss: 0.0131
Epoch [2/3], Step [15000/15000], Loss: 0.0012
Epoch [3/3], Step [5000/15000], Loss: 0.0005
Epoch [3/3], Step [10000/15000], Loss: 0.0004
Epoch [3/3], Step [15000/15000], Loss: 0.0282


# Tensorflow RNN

In [86]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, layers, optimizers, datasets

In [87]:
x = tf.random.normal(shape=(32, 20, 100), mean=0, stddev=1.)

In [88]:
rnn = layers.SimpleRNN(units=10)

In [89]:
x.shape # [batch_size, timestep, features]

TensorShape([32, 20, 100])

In [90]:
output = rnn(x)
output.shape

TensorShape([32, 10])

In [94]:
rnn2 = layers.SimpleRNN(units=10, return_sequences=True)
# output sequence 마지막 output을 return 할지 (False-defalut), full sequence를 return,

In [91]:
x.shape

TensorShape([32, 20, 100])

In [95]:
output2 = rnn2(x)
output2.shape

TensorShape([32, 20, 10])

In [98]:
rnn3 = layers.SimpleRNN(units=10, return_sequences=True, return_state=True)
# output 이외에 state 출력

In [99]:
output3, final_sate = rnn3(x)

In [100]:
print(output3.shape)
print(final_sate.shape)

(32, 20, 10)
(32, 10)


In [101]:
x.shape

TensorShape([32, 20, 100])

## Bi-directional RNN

In [102]:
rnn = layers.SimpleRNN(units=10)

In [103]:
bi_rnn = layers.Bidirectional(rnn)

In [105]:
output = bi_rnn(x)

In [106]:
output.shape

TensorShape([32, 20])

# Tensorflow LSTM

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, layers, optimizers, datasets

In [2]:
x = tf.random.normal(shape=(32, 20, 100), mean=0, stddev=1.)

In [4]:
lstm = layers.LSTM(units=10)

In [5]:
x.shape

TensorShape([32, 20, 100])

In [6]:
out = lstm(x)
out.shape

TensorShape([32, 10])

In [8]:
lstm2 = layers.LSTM(units=10, return_sequences=True, return_state=True) # [batch, timesteps, features]
# return_sequences : output sequences의 마지막 output을 return할지 (False - default), full sequence를 return할 지(True)
# return_state : output이외에 last state(hidden_sate, cell_state)도 return할 지를 결정하는 인자(True)

In [10]:
output2, final_hidden, final_cell = lstm2(x)
print('output2 :', output2.shape)
print('final_hidden :', final_hidden.shape)
print('final_cell :', final_cell.shape)

output2 : (32, 20, 10)
final_hidden : (32, 10)
final_cell : (32, 10)


## LSTM application

In [20]:
class LSTM(Model):
  def __init__(self, units1, units2, num_classes):
    super(LSTM, self).__init__()
    self.sequential = tf.keras.Sequential([
        layers.LSTM(units1, return_sequences=True),
        layers.BatchNormalization(),
        layers.LSTM(units2),
        layers.BatchNormalization(),
        layers.Dense(num_classes, activation=tf.nn.softmax)
    ])
  def call(self, x):
    out = self.sequential(x)
    return out

In [21]:
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

In [22]:
x_train = x_train/255.
x_test = x_test/255.

In [23]:
units_1 = 128
units_2 = 256
num_classes = 10

model = LSTM(units_1, units_2, num_classes)

In [24]:
model.compile(optimizer='sgd', loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])

In [25]:
model.fit(x_train, y_train, validation_data = (x_test, y_test), batch_size=32, epochs=3)

Epoch 1/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m291s[0m 154ms/step - accuracy: 0.3216 - loss: 1.9923 - val_accuracy: 0.6710 - val_loss: 0.9877
Epoch 2/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 130ms/step - accuracy: 0.7289 - loss: 0.8086 - val_accuracy: 0.8554 - val_loss: 0.4335
Epoch 3/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 126ms/step - accuracy: 0.8673 - loss: 0.4086 - val_accuracy: 0.8941 - val_loss: 0.3155


<keras.src.callbacks.history.History at 0x7dbf8aa8efe0>

## Bidirectional LSTM Application

In [26]:
class Bi_LSTM(Model):
  def __init__(self, units1, units2, num_classes):
    super(Bi_LSTM, self).__init__()

    self.sequential = tf.keras.Sequential([
        layers.LSTM(units1, return_sequences=True),
        layers.BatchNormalization(),
        layers.Bidirectional(layers.LSTM(units2)),
        layers.BatchNormalization(),
        layers.Dense(num_classes, activation=tf.nn.softmax)
    ])
  def call(self, x):
    out = self.sequential(x)
    return out

In [27]:
model = Bi_LSTM(units_1, units_2, num_classes)

In [28]:
model.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])

In [29]:
model.fit(x_train, y_train, validation_data = (x_test, y_test), batch_size=32, epochs=3)

Epoch 1/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m437s[0m 230ms/step - accuracy: 0.8155 - loss: 0.5441 - val_accuracy: 0.9631 - val_loss: 0.1111
Epoch 2/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m427s[0m 228ms/step - accuracy: 0.9746 - loss: 0.0854 - val_accuracy: 0.9804 - val_loss: 0.0667
Epoch 3/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m444s[0m 229ms/step - accuracy: 0.9827 - loss: 0.0583 - val_accuracy: 0.9832 - val_loss: 0.0501


<keras.src.callbacks.history.History at 0x7dbf80891210>