### 패키지 선언

In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dataset
import torchvision.transforms as transform
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

  warn(


In [7]:
print(torch.cuda.is_available())

True


## Dataset 다운로드

In [8]:
# Training dataset 다운로드
cifar100_train = dataset.CIFAR100(root = "./",
                            train = True,
                            transform = transform.ToTensor(),
                            download = True)
# Testing dataset 다운로드
cifar100_test = dataset.CIFAR100(root = "./",
                            train = False,
                            transform = transform.ToTensor(),
                            download = True)

Files already downloaded and verified
Files already downloaded and verified


## 신경망 모델 정의
- Fully Connected Layer -> nn.Linear(in_features, out_features)
- Convolutional Layer -> nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
- ReLU -> nn.ReLU()
- Max Pooling -> nn.MaxPool2d(kernel_size, stride)


In [39]:
class Network(nn.Module):

  def __init__(self):
    super(Network, self).__init__()

    # 신경망 파라미터 초기화 (Conv, FC, ReLU, MaxPool)
    self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
    self.bn1_1 = nn.BatchNorm2d(16)
    self.conv1_2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.bn1_2 = nn.BatchNorm2d(32)  
    self.conv2_1 = nn.Conv2d(in_channels=32, out_channels=48, kernel_size=3, stride=1, padding=1)
    self.bn2_1 = nn.BatchNorm2d(48)
    self.conv2_2 = nn.Conv2d(in_channels=48, out_channels=64, kernel_size=3, stride=1, padding=1)
    self.bn2_2 = nn.BatchNorm2d(64)
    self.conv3_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
    self.bn3_1 = nn.BatchNorm2d(128)
    self.conv3_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
    self.bn3_2 = nn.BatchNorm2d(256)
    self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
    self.bn4_1 = nn.BatchNorm2d(384)
    self.conv4_2 = nn.Conv2d(in_channels=384, out_channels=512, kernel_size=3, stride=1, padding=1)
    self.bn4_2 = nn.BatchNorm2d(512)
    
    # Initialize fully connected layers
    self.fc1 = nn.Linear(in_features=2048, out_features=1024)
    self.fc2 = nn.Linear(in_features=1024, out_features=512)
    self.fc3 = nn.Linear(in_features=512, out_features=256)
    self.fc4 = nn.Linear(in_features=256, out_features=100)

    # Max pooling
    self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
    # Activation
    self.relu = nn.ReLU()
    
    # Skip connection* 위한 convolution layer 추가 선언
    self.conv_skip1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.conv_skip2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
    self.conv_skip3 = nn.Conv2d(in_channels=64, out_channels=256, kernel_size=3, stride=1, padding=1)
    self.conv_skip4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)

  def forward(self, x):
    skip_y = self.conv_skip1(x)
    y = self.relu(self.bn1_1(self.conv1_1(x)))
    y = self.relu(self.bn1_2(self.conv1_2(y)))
    y = y + skip_y
    y = self.max_pool(y)
    
    skip_y = self.conv_skip2(y)
    y = self.relu(self.bn2_1(self.conv2_1(y)))
    y = self.relu(self.bn2_2(self.conv2_2(y)))
    y = y + skip_y
    y = self.max_pool(y)
    
    skip_y = self.conv_skip3(y)
    y = self.relu(self.bn3_1(self.conv3_1(y)))
    y = self.relu(self.bn3_2(self.conv3_2(y)))
    y = y + skip_y
    y = self.max_pool(y)
    
    skip_y = self.conv_skip4(y)
    y = self.relu(self.bn4_1(self.conv4_1(y)))
    y = self.relu(self.bn4_2(self.conv4_2(y)))
    y = y + skip_y
    y = self.max_pool(y)
    
    # Flatten feature maps
    y = y.view(-1, 2048)

    # Fully connected layers with dropout in between
    y = self.relu(self.fc1(y))
    y = self.relu(self.fc2(y))
    y = self.relu(self.fc3(y))
    y = self.fc4(y)
    return y


## Hyper-parameters 지정

In [73]:
batch_size = 100     # 고정 하이퍼 파라미터
training_epochs = 30 # 고정 하이퍼 파라미터

learning_rate = 0.01
loss_function = nn.CrossEntropyLoss()
network = Network().to('cuda')
optimizer = torch.optim.SGD(network.parameters(), lr = learning_rate, momentum=0.9, nesterov=True)

data_loader = DataLoader(dataset=cifar100_train,
                         batch_size=batch_size,
                         shuffle=True,
                         drop_last=True)

## CNN 학습을 위한 반복문 선언

In [74]:
for epoch in range(training_epochs):
  network.train()
  avg_cost = 0
  total_batch = len(data_loader)
  
  for img, label in data_loader:
    img = img.to('cuda')
    label = label.to('cuda')
    pred = network(img)
    loss = loss_function(pred, label)
    optimizer.zero_grad() # gradient 초기화
    loss.backward()
    optimizer.step()

    avg_cost += loss / total_batch

  print('Epoch: %d Loss = %f'%(epoch+1, avg_cost))
print('Learning finished')

Epoch: 1 Loss = 3.797328
Epoch: 2 Loss = 2.765502
Epoch: 3 Loss = 2.195975
Epoch: 4 Loss = 1.795580
Epoch: 5 Loss = 1.447281
Epoch: 6 Loss = 1.133775
Epoch: 7 Loss = 0.850002
Epoch: 8 Loss = 0.615856
Epoch: 9 Loss = 0.469003
Epoch: 10 Loss = 0.346396
Epoch: 11 Loss = 0.276044
Epoch: 12 Loss = 0.232010
Epoch: 13 Loss = 0.188048
Epoch: 14 Loss = 0.158816
Epoch: 15 Loss = 0.134752
Epoch: 16 Loss = 0.112392
Epoch: 17 Loss = 0.094978
Epoch: 18 Loss = 0.086762
Epoch: 19 Loss = 0.076946
Epoch: 20 Loss = 0.068027
Epoch: 21 Loss = 0.061557
Epoch: 22 Loss = 0.058384
Epoch: 23 Loss = 0.053574
Epoch: 24 Loss = 0.052377
Epoch: 25 Loss = 0.051170
Epoch: 26 Loss = 0.038249
Epoch: 27 Loss = 0.037341
Epoch: 28 Loss = 0.041728
Epoch: 29 Loss = 0.048347
Epoch: 30 Loss = 0.038000
Learning finished


## 학습이 완료된 모델을 이용해 정답률 확인

In [75]:
network.eval()
network = network.to('cpu')
img_test = torch.tensor(np.transpose(cifar100_test.data,(0,3,1,2))) / 255.
label_test = torch.tensor(cifar100_test.targets)

with torch.no_grad(): # test에서는 기울기 계산 제외
  prediction = network(img_test) # 전체 test data를 한번에 계산

  correct_prediction = torch.argmax(prediction, 1) == label_test
  accuracy = correct_prediction.float().mean()
  print('Accuracy:', accuracy.item())

Accuracy: 0.5083000063896179
