# 7. 순환 신경망
## 7.3 Bi-LSTM

In [1]:
import torch
import torchvision
import torch.nn as nn 
import torch.optim as optim  
from torch.utils.data import DataLoader

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
cd/content/gdrive/My Drive/pytorch_dlbro

In [2]:
# Load Data
tensor_mode = torchvision.transforms.ToTensor()
trainset = torchvision.datasets.MNIST(root="./data", train=True, transform=tensor_mode, download=True)
testset = torchvision.datasets.MNIST(root="./data", train=False, transform=tensor_mode, download=True)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True)
testloader = DataLoader(testset, batch_size=128, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz


100.0%

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz



9.9%

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


100.0%

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [3]:
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, seq_length, num_classes, device):
        super(BiLSTM, self).__init__()
        self.device = device
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_length = seq_length
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        #self.fc = nn.Linear(hidden_size * 2, num_classes)
        self.fc = nn.Linear(seq_length*hidden_size * 2, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(self.device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(self.device)
        out, _ = self.lstm(x, (h0, c0))
        #out = self.fc(out[:, -1, :])
        out = out.reshape(-1,self.seq_length*self.hidden_size * 2)
        out = self.fc(out)
        return out

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sequence_length = trainset.data.size(1)
input_size = trainset.data.size(2)
num_layers = 2
hidden_size = 12
num_classes = 10

In [5]:
model = BiLSTM(input_size, hidden_size, num_layers, sequence_length, num_classes, device)
model = model.to(device)   

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=5e-3)

In [7]:
for epoch in range(51):
    correct = 0
    total = 0
    for data in trainloader:
        optimizer.zero_grad()
        inputs, labels = data[0].to(device).squeeze(1), data[1].to(device)  
        outputs = model(inputs)
        loss = criterion(outputs, labels)        
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.detach(), 1) # detach() : 기존 Tensor에서 gradient 전파가 안되는 텐서 생성, torch.no_grad() 와 같은 동작
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('[%d] train acc: %.2f' %(epoch, 100*correct/total))     

[0] train acc: 91.56
[1] train acc: 97.38
[2] train acc: 98.13
[3] train acc: 98.48
[4] train acc: 98.72
[5] train acc: 98.80
[6] train acc: 98.93
[7] train acc: 99.08
[8] train acc: 99.14
[9] train acc: 99.28
[10] train acc: 99.30
[11] train acc: 99.30
[12] train acc: 99.36
[13] train acc: 99.45
[14] train acc: 99.47
[15] train acc: 99.46
[16] train acc: 99.55
[17] train acc: 99.64
[18] train acc: 99.55
[19] train acc: 99.62
[20] train acc: 99.65
[21] train acc: 99.57
[22] train acc: 99.69
[23] train acc: 99.60
[24] train acc: 99.77
[25] train acc: 99.72
[26] train acc: 99.65
[27] train acc: 99.75
[28] train acc: 99.74
[29] train acc: 99.64
[30] train acc: 99.73
[31] train acc: 99.86
[32] train acc: 99.86
[33] train acc: 99.59
[34] train acc: 99.70
[35] train acc: 99.81
[36] train acc: 99.80
[37] train acc: 99.82
[38] train acc: 99.77
[39] train acc: 99.81
[40] train acc: 99.78
[41] train acc: 99.69
[42] train acc: 99.80
[43] train acc: 99.88
[44] train acc: 99.72
[45] train acc: 99.7

In [8]:
def accuracy(dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        model.eval() #  evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off, 작업이 끝난 후에는 잊지말고 train mode로 모델을 변경 (.train())
        for data in dataloader:
            inputs, labels = data[0].to(device).squeeze(1), data[1].to(device)      
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)      
            correct += (predicted == labels).sum().item()

    acc = 100*correct/total
    model.train()
    return acc

In [9]:
train_acc = accuracy(trainloader)
test_acc = accuracy(testloader)
print("Train Acc: %.1f, Test Acc: %.1f" %(train_acc, test_acc))

Train Acc: 99.9, Test Acc: 98.7
