In [1]:
import os
import time
import copy
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms

In [10]:
def conv3x3(in_channels, out_channels, stride=1):
  return nn.Conv2d(in_channels=in_channels,
                   out_channels=out_channels,
                   kernel_size=3,
                   stride=stride,
                   padding=1,
                   bias=False)

In [12]:
class ResidualBlock(nn.Module):
  def __init__(self, in_channels, out_channels, stride=1, downsample=None):
    super(ResidualBlock, self).__init__()
    self.conv1 = conv3x3(in_channels, out_channels, stride)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(out_channels, out_channels)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.downsample = downsample

  def forward(self, x):
    residual = x
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    if self.downsample:
      residual = self.downsample(x)
    out += residual
    out = self.relu(out)
    return out

In [23]:
class ResNet(nn.Module):
  def __init__(self, block, layers, num_classes=10):
    # block -> ResidualBlock
    # layers -> [2, 2, 2]
    super(ResNet, self).__init__()
    self.in_channels = 16
    self.conv = conv3x3(3, 16)
    self.bn = nn.BatchNorm2d(16)
    self.relu = nn.ReLU(inplace=True)
    self.layer1 = self.init_layer(block, 16, layers[0])
    self.layer2 = self.init_layer(block, 32, layers[1], 2)
    self.layer3 = self.init_layer(block, 64, layers[2], 2)
    self.avg_pool = nn.AvgPool2d(8)
    self.fc = nn.Linear(64, num_classes)

  def init_layer(self, block, out_channels, blocks, stride=1):
    downsample = None

    if (stride != 1) or (self.in_channels != out_channels):
      # stride가 1이 아니거나
      # self.in_channels과 인자로 받은 out_channels이 다를 때
      downsample = nn.Sequential(
          conv3x3(self.in_channels, out_channels, stride=stride),
          nn.BatchNorm2d(out_channels)
      )

    # ResidualBlock append
    layers = []
    layers.append(block(self.in_channels, out_channels, stride, downsample))

    self.in_channels = out_channels
    
    for i in range(1, blocks):  
      # ResidualBlock append (input, output channel 같게)
      layers.append(block(out_channels, out_channels))

    return nn.Sequential(*layers)

  def forward(self, x):
    out = self.conv(x)
    out = self.bn(out)
    out = self.relu(out)
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.avg_pool(out)
    out = out.view(out.size(0), -1)
    out = self.fc(out)
    return out

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# hyper parameters
num_epochs = 80
learning_rate = 0.001
momentum = 0.9

transform = transforms.Compose([
                                transforms.Pad(4),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomCrop(32),
                                transforms.ToTensor() 
])

cuda


In [6]:
# dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True,
                                             transform=transform,
                                             download=True)
test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                            train=False,
                                            transform=transforms.ToTensor())

# loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100,
                                          shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../../data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ../../data/cifar-10-python.tar.gz to ../../data/


In [25]:
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
# learning rate 조정 스케줄러
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [33]:
def train_model(model, criterion, dataloaders, optimizer, scheduler, num_epochs=25):
  since = time.time()

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    for phase in ['train', 'val']:
      if phase == 'train':
        # 학습 모드
        model.train()
      else:
        # 평가 모드
        model.eval()

      running_loss = 0.0
      running_corrects = 0

      for inputs, labels in dataloaders:
        # tensor tansfer from cpu to gpu.
        inputs = inputs.to(device)
        labels = labels.to(device)

        # 경사도 0으로 설정
        optimizer.zero_grad()

        # 순전파 (propagation)
        # 학습 모드일 경우에만 연산 기록 추적 requires_grad=True
        with torch.set_grad_enabled(phase == 'train'):
          outputs = model(inputs)
          _, preds = torch.max(outputs, dim=1)
          loss = criterion(outputs, labels)

          # 학습 모드인 경우에만 역전파, 최적화
          if phase == 'train':
            loss.backward()
            optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

      if phase == 'train':
        scheduler.step()

      epoch_loss = running_loss / len(dataloaders)
      epoch_acc = running_corrects.double() / len(dataloaders)

      print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

      if phase == 'val' and epoch_acc > best_acc:
        best_acc = epoch_acc
        # 모델의 성능을 깊은 복사 (더 좋은 성능일 경우)
        best_model_wts = copy.deepcopy(model.state_dict())
    print('\n')

  time_elapsed = time.time() - since
  print('Training complete in {:.0f}m {:.0f}s'.format(
      time_elapsed // 60, time_elapsed % 60))
  print('Best val Acc: {:4f}'.format(best_acc))

  # 가장 나은 모델 가중치를 불러옴
  model.load_state_dict(best_model_wts)
  return model


In [34]:
model_ft = train_model(model,
                       criterion,
                       train_loader,
                       optimizer,
                       exp_lr_scheduler,
                       num_epochs=25)

Epoch 0/24
----------
train Loss: 129.7786 Acc: 52.7540
val Loss: 153.9791 Acc: 45.2080


Epoch 1/24
----------
train Loss: 117.2273 Acc: 57.9600
val Loss: 114.2742 Acc: 58.9000


Epoch 2/24
----------
train Loss: 108.3026 Acc: 61.3420
val Loss: 124.2459 Acc: 55.1360


Epoch 3/24
----------
train Loss: 101.2713 Acc: 63.6120
val Loss: 108.2650 Acc: 61.2520


Epoch 4/24
----------
train Loss: 95.5688 Acc: 65.9320
val Loss: 109.6899 Acc: 61.3340


Epoch 5/24
----------
train Loss: 88.0340 Acc: 68.7860
val Loss: 84.9163 Acc: 69.6640


Epoch 6/24
----------
train Loss: 86.7593 Acc: 69.1500
val Loss: 84.2150 Acc: 70.0820


Epoch 7/24
----------
train Loss: 85.6793 Acc: 69.6680
val Loss: 83.4718 Acc: 70.3080


Epoch 8/24
----------
train Loss: 85.1120 Acc: 69.9640
val Loss: 83.3629 Acc: 70.4040


Epoch 9/24
----------
train Loss: 84.7530 Acc: 70.1580
val Loss: 82.4228 Acc: 70.8420


Epoch 10/24
----------
train Loss: 83.8243 Acc: 70.3060
val Loss: 82.3215 Acc: 70.9660


Epoch 11/24
----------

In [None]:
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    ouputs = mode_ft(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
  
  print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))