# Mnist CNN

## 학습 단계(code 기준)

1. 라이브러리 가져오고 (torch, torchvision, matplotlib 같은것들)
2. GPU 사용 설정 하고 random value를 위한 seed 설정!
3. 학습에 사용되는 parameter 설정!(learning_rate, training_epochs, batch_size, etc)
4. 데이터셋을 가져오고 (학습에 쓰기 편하게) loader 만들기
5. 학습 모델 만들기( class CNN(torch.nn.Module) )
6. Loss function (Criterion)을 선택하고 최적화 도구 선택(optimizer)
7. 모델 학습 및 loss check(Criterion의 output)
8. 학습된 모델의 성능을 확인한다.

In [114]:
import torch
import torch.nn as nn

In [115]:
inputs = torch.Tensor(1,1,28,28)
print("inputs shape : {}".format(inputs.shape))

conv1 = nn.Conv2d(1,32,kernel_size=3,stride=1,padding=1)
max_pooling1 = nn.MaxPool2d(kernel_size=2,stride=2)


conv2 = nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1)
max_pooling2 = nn.MaxPool2d(kernel_size=2,stride=2)

print('----Layer 1 output-----')
out = conv1(inputs)
out = max_pooling1(out)
print("Layer 1 out shpae : {}".format(out.shape))


print('----Layer 2 output-----')
out = conv2(out)
out = max_pooling2(out)
print("Layer 2 out shpae : {}".format(out.shape))


out = out.view(out.size(0),-1)
print("out shpae : {}".format(out.shape))

fc = nn.Linear(3136, 10)
out = fc(out)
print("fc_out shpae : {}".format(out.shape))

inputs shape : torch.Size([1, 1, 28, 28])
----Layer 1 output-----
Layer 1 out shpae : torch.Size([1, 32, 14, 14])
----Layer 2 output-----
Layer 2 out shpae : torch.Size([1, 64, 7, 7])
out shpae : torch.Size([1, 3136])
fc_out shpae : torch.Size([1, 10])


In [116]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms

import torch.nn.init

In [42]:
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(777)
if device == "cuda":
    torch.cuda.manual_seed_all(777)

In [117]:
print(torch.cuda.is_available())

True


In [118]:
# parameter
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [119]:
# Mnist dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train= True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                          train= False,
                          transform=transforms.ToTensor(),
                          download=True)

In [120]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [121]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1,32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.fc = nn.Linear(7*7*64, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [122]:
model = CNN().to(device)

In [123]:
model

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=3136, out_features=10, bias=True)
)

In [124]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [125]:
# training
total_batch = len(data_loader)
print("Learning Started. It take times...")

for each in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device) # cuda 에서만 가능
        Y = Y.to(device) # cuda 에서만 가능
        
        optimizer.zero_grad()
        hypothesis = model(X)
        
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost/total_batch
    
    print("[Epoch :{}] cost = {}".format(each+1, avg_cost))
    
print("Learning Finished")

Learning Started. It take times...
[Epoch :1] cost = 0.21028193831443787
[Epoch :2] cost = 0.057190123945474625
[Epoch :3] cost = 0.0426187627017498
[Epoch :4] cost = 0.03435303270816803
[Epoch :5] cost = 0.028159743174910545
[Epoch :6] cost = 0.024411125108599663
[Epoch :7] cost = 0.019275110214948654
[Epoch :8] cost = 0.017298055812716484
[Epoch :9] cost = 0.014362220652401447
[Epoch :10] cost = 0.011574017815291882
[Epoch :11] cost = 0.009593506343662739
[Epoch :12] cost = 0.008400624617934227
[Epoch :13] cost = 0.007088321726769209
[Epoch :14] cost = 0.0072040739469230175
[Epoch :15] cost = 0.006143331527709961
Learning Finished


In [127]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print("Accuracy : ", accuracy.item())

Accuracy :  0.983199954032898


In [133]:
class deep_CNN(nn.Module):
    
    def __init__(self):
        super(deep_CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1,32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

            
        self.fc1 = nn.Linear(3*3*128, 625)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(625, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)        
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [134]:
model = deep_CNN().to(device)

# model test 반드시 해주기
value = torch.Tensor(1,1,28,28).to(device)
print(model(value).shape)

torch.Size([1, 10])


In [135]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [136]:
# training
total_batch = len(data_loader)
print("Learning Started. It take times...")

for each in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device) # cuda 에서만 가능
        Y = Y.to(device) # cuda 에서만 가능
        
        optimizer.zero_grad()
        hypothesis = model(X)
        
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost/total_batch
    
    print("[Epoch :{}] cost = {}".format(each+1, avg_cost))
    
print("Learning Finished")

Learning Started. It take times...
[Epoch :1] cost = 0.16870492696762085
[Epoch :2] cost = 0.043228283524513245
[Epoch :3] cost = 0.029841765761375427
[Epoch :4] cost = 0.023701490834355354
[Epoch :5] cost = 0.01934756338596344
[Epoch :6] cost = 0.01468753907829523
[Epoch :7] cost = 0.01334412582218647
[Epoch :8] cost = 0.009880347177386284
[Epoch :9] cost = 0.009876346215605736
[Epoch :10] cost = 0.008591513149440289
[Epoch :11] cost = 0.00855458714067936
[Epoch :12] cost = 0.007344287820160389
[Epoch :13] cost = 0.007414448074996471
[Epoch :14] cost = 0.004786539822816849
[Epoch :15] cost = 0.007280220743268728
Learning Finished


In [137]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print("Accuracy : ", accuracy.item())

Accuracy :  0.9871999621391296


### 결론

> Layer를 깊게 쌓는것도 중요하지만, 높은 정확도를 위해 효율적으로 model을 만드는게 더 중요하다. 따라서 연구가 진행된 다양한 구조를 익혀가는게 중요하다.