### 1) 합성곱과 풀링
* 다층 퍼셉트론은 몇가지 픽셀만 달라져도 민감하게 영향을 받음
* 합성곱 연산 : 합성곱을 통해 이미지의 특징을 추출하는 역할을 함
* 커널 / 필터라고 불리는 행렬로 이미지를 훑음
* 커널을 훑은 후의 결과를 특성맵이라고 부름
* 이때 이동범위를 스트라이드 stride라고 함
* 이때 합성곱을 통해 작아진 특성맵을 방지하기 위해 패딩 Padding을 사용할 수 있음
* 3차원 텐서의 합성곱 연산시 입력 데이터의 채널 수와 커널의 채널 수는 같아야 함函

### 2) CNN으로 MNIST 분류하기

In [1]:
import torch
import torch.nn as nn

In [2]:
inputs = torch.Tensor(1, 1, 28, 28) #배치 * 채널 * 높이* 너비
print('텐서의 크기 : {}'.format(inputs.shape))

텐서의 크기 : torch.Size([1, 1, 28, 28])


In [3]:
conv1 = nn.Conv2d(1, 32, 3, padding=1)
# 1채널 짜리를 입력받아서 32채널을 뽑아내는데 커널 사이즈는 3이고 패딩은 1입니다.
print(conv1)

Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [4]:
conv2 = nn.Conv2d(32, 64, 3, padding=1)
# 1채널 짜리를 입력받아서 32채널을 뽑아내는데 커널 사이즈는 3이고 패딩은 1입니다.
print(conv2)

Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [5]:
pool = nn.MaxPool2d(2)
# 정수 하나를 인자로 넣으면 커널 사이즈와 스트라이드가 둘 다 해당값으로 지정됩니다.

In [6]:
out = conv1(inputs)
print(out.shape)

torch.Size([1, 32, 28, 28])


In [7]:
out = pool(out)
print(out.shape)

torch.Size([1, 32, 14, 14])


In [8]:
out = conv2(out)
print(out.shape)

torch.Size([1, 64, 14, 14])


In [9]:
out = pool(out)
print(out.shape)

torch.Size([1, 64, 7, 7])


In [10]:
out.size(0)

1

In [11]:
out.size(1)

64

In [12]:
out.size(2)

7

In [13]:
out.size(3)

7

In [14]:
out = out.view(out.size(0),-1)
print(out.shape)

torch.Size([1, 3136])


In [15]:
fc = nn.Linear(3136, 10)
out = fc(out)
print(out.shape)

torch.Size([1, 10])


In [16]:
import torch
import torch.nn as nn
import torch.nn.init
import torch.optim as optim

import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [17]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)

if device == 'cuda':
  torch.cuda.manual_seed(777)

In [18]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [19]:
mnist_train = dsets.MNIST(root='MINST_data/',
                          train= True,
                          transform=transforms.ToTensor(),
                          download=True
                          )
mnist_test = dsets.MNIST(root='MINST_data/',
                          train= False,
                          transform=transforms.ToTensor(),
                          download=True
                          )

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MINST_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 103743721.20it/s]


Extracting MINST_data/MNIST/raw/train-images-idx3-ubyte.gz to MINST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MINST_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 112058921.21it/s]


Extracting MINST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MINST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MINST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 25522531.47it/s]


Extracting MINST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MINST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MINST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 6026741.15it/s]


Extracting MINST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MINST_data/MNIST/raw



In [20]:
data_loader = torch.utils.data.DataLoader(dataset= mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [21]:
class CNN(nn.Module):
  def __init__(self):
    super().__init__()

    self.layer1 = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
        )
    self.layer2 = nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
        )
    self.fc = nn.Linear(7*7*64, 10, bias =True)
    nn.init.xavier_uniform_(self.fc.weight)
  
  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = out.view(out.size(0), -1)
    out = self.fc(out)
    return out

In [22]:
model = CNN().to(device)

In [23]:
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [24]:
total_batch = len(data_loader)
print('총 배치의 수 : {}'.format(total_batch))
# 600 * 100 = 60000 ( 총 60000개의 데이터라는 의미 )

총 배치의 수 : 600


In [25]:
for epoch in range(training_epochs):
  avg_loss = 0

  for x, y in data_loader: # 미니 배치 단위로 꺼내온다. => 100장씩
    x = x.to(device)
    y = y.to(device)

    optimizer.zero_grad()
    hypothesis = model(x)
    loss = loss_fn(hypothesis, y)
    loss.backward()
    optimizer.step()

    
    avg_loss += loss / total_batch

  print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_loss))

[Epoch:    1] cost = 2.11704373
[Epoch:    2] cost = 1.52152765
[Epoch:    3] cost = 0.856063962
[Epoch:    4] cost = 0.58015871
[Epoch:    5] cost = 0.472104847
[Epoch:    6] cost = 0.415761739
[Epoch:    7] cost = 0.380402803
[Epoch:    8] cost = 0.355341941
[Epoch:    9] cost = 0.336126715
[Epoch:   10] cost = 0.320798755
[Epoch:   11] cost = 0.307770908
[Epoch:   12] cost = 0.296554476
[Epoch:   13] cost = 0.286622703
[Epoch:   14] cost = 0.277739435
[Epoch:   15] cost = 0.269487798


In [26]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

Accuracy: 0.8985999822616577




### 3) 깊은 CNN으로 MNIST 분류하기

In [27]:
class CNN(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        # L1 ImgIn shape=(?, 28, 28, 1)
        #    Conv     -> (?, 28, 28, 32)
        #    Pool     -> (?, 14, 14, 32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # L2 ImgIn shape=(?, 14, 14, 32)
        #    Conv      ->(?, 14, 14, 64)
        #    Pool      ->(?, 7, 7, 64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # L3 ImgIn shape=(?, 7, 7, 64)
        #    Conv      ->(?, 7, 7, 128)
        #    Pool      ->(?, 4, 4, 128)
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1))

        # L4 FC 4x4x128 inputs -> 625 outputs
        self.fc1 = torch.nn.Linear(4 * 4 * 128, 625, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4 = torch.nn.Sequential(
            self.fc1,
            torch.nn.ReLU(),
            torch.nn.Dropout(p=1 - self.keep_prob))
        # L5 Final FC 625 inputs -> 10 outputs
        self.fc2 = torch.nn.Linear(625, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)   # Flatten them for FC
        out = self.layer4(out)
        out = self.fc2(out)
        return out

In [28]:
model = CNN().to(device)

In [29]:
criterion = torch.nn.CrossEntropyLoss().to(device)    # 비용 함수에 소프트맥스 함수 포함되어져 있음.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [30]:
total_batch = len(data_loader)
print('총 배치의 수 : {}'.format(total_batch))

총 배치의 수 : 600


In [34]:
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader: # 미니 배치 단위로 꺼내온다. X는 미니 배치, Y느 ㄴ레이블.
        # image is already size of (28x28), no reshape
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

[Epoch:    1] cost = 0.190264583
[Epoch:    2] cost = 0.0484826714
[Epoch:    3] cost = 0.0346223004
[Epoch:    4] cost = 0.0273416732
[Epoch:    5] cost = 0.0220400281
[Epoch:    6] cost = 0.0186003912
[Epoch:    7] cost = 0.0167724621
[Epoch:    8] cost = 0.0129122045
[Epoch:    9] cost = 0.0114678359
[Epoch:   10] cost = 0.0108522382
[Epoch:   11] cost = 0.0113553852
[Epoch:   12] cost = 0.00810614228
[Epoch:   13] cost = 0.0103652021
[Epoch:   14] cost = 0.0062594153
[Epoch:   15] cost = 0.00769577036


In [35]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

Accuracy: 0.9847999811172485
