<a href="https://colab.research.google.com/github/Happy-Virus-IkBeom/Pytorch/blob/master/MNIST_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import itertools
from IPython.display import Image
from IPython import display
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

#학습 데이터 download
"""
training data, validation data 나누는 이유 : 
training data를 학습하다 보면 loss 는 계속 줄어들지만,실제로 써보면 어느순간부터 overfitting 때문에 loss 가 큰데, 
그 지점을 Validation data로 찾아낸다. validation data의 loss가 가장 작은지점이 최적의 지점.
Test data는 말그대로 test data. 
즉, 3개 데이터의 목적이 다른것이다.

"""
trn_dataset = datasets.MNIST('../mnist_data/',
                             download = True,
                             train = True,
                             transform = transforms.Compose([
                                                             transforms.ToTensor(), # image to Tensor
                                                             transforms.Normalize((0.1307,),(0.3081,)) #image, label
                             ]))

val_dataset = datasets.MNIST('../mnist_data/',
                             download = True,
                             train = False,
                             transform = transforms.Compose([
                                                             transforms.ToTensor(), # image to Tensor
                                                             transforms.Normalize((0.1307,),(0.3081,)) #image, label
                             ]))

ㄹ

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to ../mnist_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to ../mnist_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../mnist_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../mnist_data/MNIST/raw
Processing...
Done!




In [8]:
# batch iterator 생성

batch_size = 64
trn_loader = torch.utils.data.DataLoader(trn_dataset,
                                         batch_size = batch_size,
                                         shuffle = True)

val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size = batch_size,
                                         shuffle = True)

In [19]:
# CNN 모델 생성

use_cuda = torch.cuda.is_available() # construct model on cuda if available

class CNN(nn.Module):

  def __init__(self):
    super(CNN,self).__init__() #항상 torch.nn.Module을 상속받고 시작

    conv1 = nn.Conv2d(1, 6, 5, 1) # 6 @ 24 * 24 => 1 + (28-5)/1
    # activation ReLU
    pool1 = nn.MaxPool2d(2)       # 6 @ 12 * 12 => 24/2

    conv2 = nn.Conv2d(6,16,5,1)   # 16@  8  * 8
    # activation ReLU
    pool2 = nn.MaxPool2d(2)       # 16@  4  * 4, 

    self.conv_module = nn.Sequential(
        conv1,
        nn.ReLU(),
        pool1,
        conv2,
        nn.ReLU(),
        pool2
    )

    fc1 = nn.Linear(16*4*4, 120)
    # activation ReLU 

    fc2 = nn.Linear(120, 84) 
    # activation ReLU

    fc3 = nn.Linear(84, 10) 
    

    self.fc_module = nn.Sequential(
        fc1,
        nn.ReLU(),
        fc2,
        nn.ReLU(),
        fc3
    )

    if use_cuda:
      self.conv_module = self.conv_module.cuda()
      self.fc_module = self.fc.module.cuda()

  def forward(self,x):
    out = self.conv_module(x) # 16@ 4 * 4 # batch를 고려하면 batchsize * x * y * filter
    # make linear
    dim = 1
    for d in out.size()[1:]: # 16, 4, 4 // all dimensions except the batch dimension //  [1:]이므로 x,y,filter이 선택.
      dim = dim * d          # for loop을 다 돌면 x * y * filter가 됨.
    out = out.view(-1, dim)  # 마지막에 1차원으로 만들어주는 역할. 
    out = self.fc_module(out)
    return F.softmax(out, dim=1) # Softmax 함수는 인덱스의 각 확률을 구해주는 함수. ex) 1,3,2,0 의 각 확률을 0.1, 0.1, 0.9, 0 로 나타내줌.

cnn = CNN()
print(CNN)

# loss
criterion = nn.CrossEntropyLoss() # 
# backpropagation method
learning_rate = 1e-3
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate) # SGD optimizer에 가중치가 더 추가된 optimizer. 기존의 경로를 반영하기 때문에 더 효율적임.
# hyper-parameters
num_epochs = 2
num_batches = len(trn_loader)

trn_loss_list = []
val_loss_list = []
for epoch in range(num_epochs):
    trn_loss = 0.0
    for i, data in enumerate(trn_loader):
        x, label = data
        if use_cuda:
            x = x.cuda()
            label = label.cuda()
        # grad init
        optimizer.zero_grad() # gradient 초기화
        # forward propagation
        model_output = cnn(x)
        # calculate loss
        loss = criterion(model_output, label)
        # back propagation 
        loss.backward()
        # weight update
        optimizer.step()
        
        # trn_loss summary
        trn_loss += loss.item()
        # del (memory issue)
        del loss
        del model_output
        
        # 학습과정 출력
        if (i+1) % 100 == 0: # every 100 mini-batches
            with torch.no_grad(): # very very very very important!!! => gradient 제외 => validation을 구할때 gradient 값은 update 되면 안되므로.
                val_loss = 0.0
                for j, val in enumerate(val_loader):
                    val_x, val_label = val
                    if use_cuda:
                        val_x = val_x.cuda()
                        val_label =val_label.cuda()
                    val_output = cnn(val_x)
                    v_loss = criterion(val_output, val_label)
                    val_loss += v_loss
                       
            print("epoch: {}/{} | step: {}/{} | trn loss: {:.4f} | val loss: {:.4f}".format(
                epoch+1, num_epochs, i+1, num_batches, trn_loss / 100, val_loss / len(val_loader)
            ))            
            
            trn_loss_list.append(trn_loss/100)
            val_loss_list.append(val_loss/len(val_loader))
            trn_loss = 0.0

<class '__main__.CNN'>
epoch: 1/2 | step: 100/938 | trn loss: 1.9272 | val loss: 1.6783
epoch: 1/2 | step: 200/938 | trn loss: 1.6244 | val loss: 1.5725
epoch: 1/2 | step: 300/938 | trn loss: 1.5512 | val loss: 1.5314
epoch: 1/2 | step: 400/938 | trn loss: 1.5394 | val loss: 1.5233
epoch: 1/2 | step: 500/938 | trn loss: 1.5309 | val loss: 1.5175
epoch: 1/2 | step: 600/938 | trn loss: 1.5201 | val loss: 1.5062
epoch: 1/2 | step: 700/938 | trn loss: 1.5120 | val loss: 1.5069
epoch: 1/2 | step: 800/938 | trn loss: 1.5103 | val loss: 1.5115
epoch: 1/2 | step: 900/938 | trn loss: 1.5072 | val loss: 1.5141
epoch: 2/2 | step: 100/938 | trn loss: 1.5026 | val loss: 1.4914
epoch: 2/2 | step: 200/938 | trn loss: 1.5017 | val loss: 1.4905
epoch: 2/2 | step: 300/938 | trn loss: 1.5005 | val loss: 1.4925
epoch: 2/2 | step: 400/938 | trn loss: 1.4937 | val loss: 1.4930
epoch: 2/2 | step: 500/938 | trn loss: 1.4956 | val loss: 1.4955
epoch: 2/2 | step: 600/938 | trn loss: 1.4920 | val loss: 1.4922
ep