In [209]:
import torch
import torch.nn as nn

In [210]:
# 배치 크기 × 채널 × 높이(height) × 너비(widht)의 크기의 텐서를 선언
inputs = torch.Tensor(1, 1, 28, 28)
print('텐서의 크기 : {}'.format(inputs.shape))

텐서의 크기 : torch.Size([1, 1, 28, 28])


In [211]:
conv1 = nn.Conv2d(1, 64, 3, padding=1) # 필터의 채널(=in_channel), 개수(out_channel), 크기
print(conv1)

Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [212]:
pool = nn.MaxPool2d(2)
print(pool)

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


In [213]:
conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
print(conv2)

Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [214]:
pool = nn.MaxPool2d(2)
print(pool)

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


In [215]:
conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
print(conv3)

Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [216]:
pool = nn.MaxPool2d(2)
print(pool)

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


지금까지는 선언만한 것이고 아직 이들을 연결시키지는 않았습니다. 이들을 연결시켜서 모델을 완성시켜보겠습니다. 우선 입력을 첫번째 합성곱층을 통과시키고 합성곱층을 통과시킨 후의 텐서의 크기를 보겠습니다.

In [217]:
out = conv1(inputs)
print(out.shape)

torch.Size([1, 64, 28, 28])


In [218]:
out = pool(out)
print(out.shape)

torch.Size([1, 64, 14, 14])


In [219]:
out = conv2(out)
print(out.shape)

torch.Size([1, 128, 14, 14])


In [220]:
out = pool(out)
print(out.shape)

torch.Size([1, 128, 7, 7])


In [221]:
out = conv3(out)
print(out.shape)

torch.Size([1, 256, 7, 7])


In [222]:
out = pool(out)
print(out.shape) # batch, c, h, w -> batch, cxhxw

torch.Size([1, 256, 3, 3])


In [223]:
# 첫번째 차원인 배치 차원은 그대로 두고 나머지는 펼쳐라
out = out.view(out.size(0), -1) # (0)은 out
print(out.shape)

torch.Size([1, 2304])


In [224]:
fc = nn.Linear(2304, 10) # input_dim = 3,136, output_dim = 10
out = fc(out)
print(out.shape)

torch.Size([1, 10])


## CNN으로 MNIST 분류하기

In [225]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

In [226]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 랜덤 시드 고정 : 고정된 무작위성
torch.manual_seed(777)

# GPU 사용 가능일 경우 랜덤 시드 고정
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [227]:
learning_rate = 0.001
training_epochs = 15
batch_size = 256

In [228]:
mnist_train = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=True, # True를 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                         train=False, # False를 지정하면 테스트 데이터로 다운로드
                         transform=transforms.ToTensor(), # 텐서로 변환
                         download=True)

In [229]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [230]:
class CNN(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        # 첫번째층
        # ImgIn shape=(?, 1, 28, 28)
        #    Conv     -> (?, 32, 28, 28)
        #    Pool     -> (?, 32, 14, 14)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # 두번째층
        # ImgIn shape=(?, 32, 14, 14)
        #    Conv      ->(?, 64, 14, 14)
        #    Pool      ->(?, 64, 7, 7)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # 전결합층 7x7x64 inputs -> 10 outputs

        self.fc = torch.nn.Linear(3 * 3 * 256, 10, bias=True)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)   # 전결합층을 위해서 Flatten
        out = self.fc(out)
        return out

In [231]:
# CNN 모델 정의
model = CNN().to(device) # .to('cuda') == .cuda() / .to('cpu') == .cpu()

Loss
- pytorch에서는 CrossEntropyLoss()로 sparse categorical, categorical 두개를 자동으로 인식하여 처리한다. (softmax 포함하기 때문에 모델에서 Softmax 생략)
- 이진분류 경우에는 BCELoss() 사용 (sigmoid 미포함이므로 모델에서 sigmoid 선언)

In [232]:
criterion = torch.nn.CrossEntropyLoss()   # 비용 함수에 소프트맥스 함수 포함되어져 있음.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [233]:
total_batch = len(data_loader)
print('총 배치의 수 : {}'.format(total_batch))

총 배치의 수 : 39


In [234]:
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X) # forward 실행
        cost = criterion(hypothesis, Y) # CrossEntropyLoss
        cost.backward() # gradient 계산
        optimizer.step() # weight 업데이트

        avg_cost += cost / total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

[Epoch:    1] cost = 0.876085997
[Epoch:    2] cost = 0.167580098
[Epoch:    3] cost = 0.0987980291
[Epoch:    4] cost = 0.0620296896
[Epoch:    5] cost = 0.0434814915
[Epoch:    6] cost = 0.0338782892
[Epoch:    7] cost = 0.0240439177
[Epoch:    8] cost = 0.0139407618
[Epoch:    9] cost = 0.00962143578
[Epoch:   10] cost = 0.00796762668
[Epoch:   11] cost = 0.00497771287
[Epoch:   12] cost = 0.00534754712
[Epoch:   13] cost = 0.0045820158
[Epoch:   14] cost = 0.00224256539
[Epoch:   15] cost = 0.00161633454


In [236]:
with torch.no_grad():
    X_test = mnist_train.train_data.view(len(mnist_train), 1, 28, 28).float().to(device)
    Y_test = mnist_train.train_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

OutOfMemoryError: CUDA out of memory. Tried to allocate 11.22 GiB. GPU 0 has a total capacity of 14.74 GiB of which 2.36 GiB is free. Process 5220 has 12.38 GiB memory in use. Of the allocated memory 11.59 GiB is allocated by PyTorch, and 664.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [243]:
import gc
gc.collect()

0

In [245]:
with torch.no_grad():
    X_train = mnist_train.train_data[:1000].view(1000, 1, 28, 28).float().to(device)
    Y_train = mnist_train.train_labels[:1000].to(device)

    prediction = model(X_train)
    correct_prediction = torch.argmax(prediction, 1) == Y_train
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

Accuracy: 0.9750000238418579


## 깊은 CNN으로 MNIST 분류하기

직접 해봅시다.

위 코드를 참고하여, 더 깊은 CNN 레이어를 쌓아보고, 학습시켜봅시다.

##### 정답

In [23]:

class CNN(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias=True)

        # 전결합층 한정으로 가중치 초기화
        torch.nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [24]:
model = CNN().cuda() # to('cuda')

criterion = torch.nn.CrossEntropyLoss().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [25]:
for epoch in range(training_epochs):
    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x256 and 3136x10)