# Chapter 07. CNN MNIST Example in Pytorch

In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable

## 1. 하이퍼파라미터 설정

In [0]:
batch_size = 100          # 배치 크기
learning_rate = 0.0002    # 학습률
num_epoch = 50            # epoch

## 2. 입력 데이터
### 2.1. 데이터 로드

In [0]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

### 2.2. 데이터 확인

In [4]:
print('(학습 데이터의 이미지 크기, 학습 데이터의 개수) :', mnist_train.__getitem__(0)[0].size(), ',', mnist_train.__len__())
print('(시험 데이터의 이미지 크기, 시험 데이터의 개수) :', mnist_test.__getitem__(0)[0].size(), ',',  mnist_test.__len__())

(학습 데이터의 이미지 크기, 학습 데이터의 개수) : torch.Size([1, 28, 28]) , 60000
(시험 데이터의 이미지 크기, 시험 데이터의 개수) : torch.Size([1, 28, 28]) , 10000


### 2.3. 데이터로더 설정
`DataLoader`로 부터 원하는 `batch_size`만큼의 데이터를 받아올 수 있다.
* `dataset` : 입력 데이터
* `batch_size` : 배치 크기
* `shuffle` : True면 매 epoch마다 데이터를 섞음
* `num_workers` : 멀티쓰레딩을 통해 빠르게 데이터를 로드

In [0]:
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=2,drop_last=True)

## 3. 신경망 모델 정의
### 3.1. 합성곱 계층 : Conv2d 클래스
`CLASS torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')`
- input size: $(N, C_{\text{in}}, H, W)$
- output size: $(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})$
- output value: 
$$ \text{out} (N_i,C_{\text{out}_j}) = \text{bias} (C_{\text{out}_j}) + 
 \sum_{k=0}^{C_{\text{in}} - 1} \text{weight} (C_{\text{out}_j}, k) * \text{input} (N_i, k)  $$
- output shape: 
$$ H_{\text{out}} = \frac{H + 2P -FH}{S} + 1, \text{where FH is filter height, P is padding, S is stride} $$
$$ W_{\text{out}} = \frac{W + 2P -FW}{S} + 1, \text{where FW is filter width, P is padding, S is stride} $$

### 3.2. 풀링 계층 : MaxPool2d 클래스
`CLASS torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)`
- input size: $(N, C_{\text{in}}, H, W)$
- output size: $(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})$
- output shape: 
$$ H_{\text{out}} = \frac{H}{S}, \text{Usually S = pooling window size} $$
$$ W_{\text{out}} = \frac{W}{S}, \text{Usually S = pooling window size} $$

In [0]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            # in_channels=1, out_channels=16, filter_size=5
            # stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'
            # input shape = (batch_size, 1, 28, 28)
            # output shape = (batch_size, 16, 24, 24)
            nn.Conv2d(1,16,5),
            nn.ReLU(),
            
            # in_channels=16, out_channels=32, filter_size=5
            # output shape = (batch_size, 32, 20, 20)
            nn.Conv2d(16,32,5),    
            nn.ReLU(),
            
            # output shape = (batch_size, 32, 10, 10)
            nn.MaxPool2d(2,2),
            
            # in_channels=32, out_channels=64, filter_size=5
            # output shape = (batch_size, 64, 6, 6)
            nn.Conv2d(32,64,5),    
            nn.ReLU(),
            
            # output shape = (batch_size, 64, 3, 3)
            nn.MaxPool2d(2,2)     
        )
        
        # 마지막 Layer
        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )       
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)

        return out

In [0]:
# model = CNN()
# GPU사용
model = CNN().cuda()

## 4. 손실 함수와 경사하강법(Optimizer) 객체 생성

In [0]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## 5. 학습

In [9]:
print('='*30, 'Start', '='*30)

for i in range(num_epoch):
    for j,[image,label] in enumerate(train_loader):
        # x = Variable(image)
        # y_= Variable(label)
        
        # GPU사용
        x = Variable(image).cuda()
        y_= Variable(label).cuda()
    
        optimizer.zero_grad()          # 기울기 초기화
        output = model.forward(x)      # 예측값
        loss = loss_func(output,y_)    # 손실함수값
        loss.backward()                # 기울기
        optimizer.step()               # 파라미터 업데이트
        
        if j % 100 == 0:
            print('[epoch] :', i, '| [iter] :', j, '| [loss] :', loss.data)
            
print('='*30, 'End', '='*30)

[epoch] : 0 | [iter] : 0 | [loss] : tensor(2.2995, device='cuda:0')
[epoch] : 0 | [iter] : 100 | [loss] : tensor(2.3064, device='cuda:0')
[epoch] : 0 | [iter] : 200 | [loss] : tensor(2.2988, device='cuda:0')
[epoch] : 0 | [iter] : 300 | [loss] : tensor(2.3007, device='cuda:0')
[epoch] : 0 | [iter] : 400 | [loss] : tensor(2.3028, device='cuda:0')
[epoch] : 0 | [iter] : 500 | [loss] : tensor(2.3031, device='cuda:0')
[epoch] : 1 | [iter] : 0 | [loss] : tensor(2.3049, device='cuda:0')
[epoch] : 1 | [iter] : 100 | [loss] : tensor(2.3017, device='cuda:0')
[epoch] : 1 | [iter] : 200 | [loss] : tensor(2.3084, device='cuda:0')
[epoch] : 1 | [iter] : 300 | [loss] : tensor(2.3118, device='cuda:0')
[epoch] : 1 | [iter] : 400 | [loss] : tensor(2.2949, device='cuda:0')
[epoch] : 1 | [iter] : 500 | [loss] : tensor(2.3001, device='cuda:0')
[epoch] : 2 | [iter] : 0 | [loss] : tensor(2.3103, device='cuda:0')
[epoch] : 2 | [iter] : 100 | [loss] : tensor(2.3032, device='cuda:0')
[epoch] : 2 | [iter] : 200

## 6. 모델 성능 평가

In [10]:
correct = 0
total = 0

for image,label in test_loader:

    # x = Variable(image,volatile=True)
    # y_= Variable(label)
    
    # GPU사용
    x = Variable(image).cuda()
    y_= Variable(label).cuda()
    
    output = model.forward(x)
    _,output_index = torch.max(output,1)
        
    total += label.size(0)
    correct += (output_index == y_).sum().float()
    
print("Accuracy of Test Data: {}".format(100*correct/total))

Accuracy of Test Data: 89.77999877929688
