In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
inputs = torch.Tensor([1,2,3,4,5,6,7,8,9,10,11,12])

# [[1,2,3], [4,5,6], [7,8,9], [10,11,12]]

In [3]:
# Hyper-parameter
input_size = 1
seq_length = 3
hidden_size = 2
num_layers = 2
batch_size = 4

In [4]:
# nn.RNN

In [5]:
# Input : input, hidden_0  2개의 input 받음
# Output : output, hidden  2개의 output

# input : [seq_length, batch_size, input_size]
# hidden_0 : network 초기의 hidden state : [num_layers*num_directions, batch_size, input_size]
# num directions : Bidirectional RNN일 경우 2, 나머지 1
# hidden_0은 따로 초기화 하지 않으면 PyyTorch가 자동으로 0으로 초기화

# out : 마지막 RNN layer로부터 매 timesteps 마다의 output
# h_n : 모든 RNN layer로부터 마지막 timestep의 hidden value
# h_n의 shape : [num_layers*num_directions, batch_size, hidden_size]

rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
# batch_first=True : [seq, batch, feature] -> [batch, seq, feature] 으로 바뀜

In [6]:
# Input
inputs.shape

torch.Size([12])

In [10]:
inputs = inputs.view(batch_size, seq_length, input_size)

In [14]:
print('inputs :',inputs.shape) # [batch size, seq length, input size]

inputs : torch.Size([4, 3, 1])


In [15]:
# Output
out, hidden = rnn(inputs)
print('out:', out.shape) # [batch size, seq length, num_directions*hidden size]
print('hidden:', hidden.shape) # [num layers*num directions, batch size, hidden size]

out: torch.Size([4, 3, 2])
hidden: torch.Size([2, 4, 2])


In [16]:
# Bi-directional RNN
# 시간 순, 시간 역행 순 모두 RNN 적용

In [20]:
bi_rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

In [21]:
inputs.shape

torch.Size([4, 3, 1])

In [22]:
out, hidden = bi_rnn(inputs)

In [23]:
print(out.shape)
print(hidden.shape)

torch.Size([4, 3, 4])
torch.Size([4, 4, 2])


In [24]:
# Bi-directional RNN 방향 분리

In [25]:
# out
print('out before:',out.shape)
out = out.view(batch_size, seq_length, 2, hidden_size) # 2: 방향이 앞, 뒤로 나눠짐
print('out after:',out.shape)

out before: torch.Size([4, 3, 4])
out after: torch.Size([4, 3, 2, 2])


In [26]:
out_direc1 = out[:,:,0,:]
print('out_direc1:',out_direc1.shape)
out_direc2 = out[:,:,1,:]
print('out_direc2:',out_direc2.shape)

out_direc1: torch.Size([4, 3, 2])
out_direc2: torch.Size([4, 3, 2])


In [27]:
# h_n
print('hidden before:', hidden.shape)
hidden = hidden.view(num_layers, 2, batch_size, hidden_size) # 2: directions 수
print('hidden after:', hidden.shape)

hidden before: torch.Size([4, 4, 2])
hidden after: torch.Size([2, 2, 4, 2])


In [28]:
hidden_direc1 = hidden[:,0,:,:]
print('h_n_direc1:',hidden_direc1.shape)
hidden_direc2 = hidden[:,1,:,:]
print('hidden_direc2:',hidden_direc2.shape)

h_n_direc1: torch.Size([2, 4, 2])
hidden_direc2: torch.Size([2, 4, 2])


In [29]:
# RNN Application

In [30]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# MNIST 데이터셋 
train_data = datasets.MNIST(
    root="../data",
    train=True,
    download=True,
    transform=transforms.ToTensor(),
)

test_data = datasets.MNIST(
    root="../data",
    train=False,
    download=True,
    transform=transforms.ToTensor(),
)

# Data loader
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [31]:
# RNN - many to one
class RNN(nn.Module):
    def __init__(self, num_classes):
        super(RNN, self).__init__()
        self.input_size = 28
        self.hidden_size = 128
        self.num_layers = 2
        self.RNN = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
        # batch_first = True이면 (seq, batch, feature) -> (batch, seq, feature)로 바뀜
        self.fc = nn.Linear(self.hidden_size, num_classes)
    
    def forward(self, x):
        out, _ = self.RNN(x)  # out: [mini-batch, seq_length, hidden_size]
        out = self.fc(out[:, -1, :])
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RNN(num_classes=10).to(device)

In [32]:
# Loss and optimizer
CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [33]:
# Train the model
total_epochs = 3
sequence_length = 28
input_size = 28

print('number of iteration :', len(train_loader))

total_step = len(train_loader)
for epoch in range(total_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = CELoss(outputs, labels)
        
        # Backward and optimize
        adam_optimizer.zero_grad()
        loss.backward()
        adam_optimizer.step()
        
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, loss.item()))

number of iteration : 469
Epoch [1/3], Loss: 0.4723
Epoch [2/3], Loss: 0.3501
Epoch [3/3], Loss: 0.1639


In [34]:
# 학습이 끝난 후 모델 성능 테스트
# test에서는 back propagation 작업을 하지 않으므로 gradient를 계산하지 않도록 함 - 메모리의 효율성을 위해

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

Test Accuracy of the model on the 10000 test images: 95.84 %


In [35]:
# 학습한 모델을 model_RNN.ckpt라는 이름으로 저장
torch.save(model.state_dict(), 'model_RNN.ckpt')

In [36]:
# Bidirectional RNN Application

In [37]:
# Many to one
class Bi_RNN(nn.Module):
    def __init__(self, num_classes):
        super(Bi_RNN, self).__init__()
        self.input_size = 28
        self.hidden_size = 128
        self.num_layers = 2
        self.RNN = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True, bidirectional=True) # bidirectional=True만 설정해주면 됨!
        # batch_first = True이면 (seq, batch, feature) -> (batch, seq, feature)로 바뀜
        self.fc = nn.Linear(self.hidden_size*2, num_classes) # Bidrectional RNN의 경우 linear에서 hidden_size * 2 해주면 됨!
    
    def forward(self, x):
        out, _ = self.RNN(x)  # out: [mini-batch, seq_length, hidden_size]
        out = self.fc(out[:, -1, :])
        return out

model = Bi_RNN(num_classes=10).to(device)

In [38]:
# Loss and optimizer
CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [39]:
# Train the model
total_epochs = 3
sequence_length = 28
input_size = 28

print('number of iteration :', len(train_loader))

total_step = len(train_loader)
for epoch in range(total_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = CELoss(outputs, labels)
        
        # Backward and optimize
        adam_optimizer.zero_grad()
        loss.backward()
        adam_optimizer.step()
        
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, loss.item()))

number of iteration : 469
Epoch [1/3], Loss: 0.3342
Epoch [2/3], Loss: 0.2826
Epoch [3/3], Loss: 0.1866


In [40]:
# 학습이 끝난 후 모델 성능 테스트
# test에서는 back propagation 작업을 하지 않으므로 gradient를 계산하지 않도록 함 - 메모리의 효율성을 위해

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

Test Accuracy of the model on the 10000 test images: 95.98 %
