In [1]:
'''Train CIFAR10 with PyTorch.'''
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import os
import sys

In [2]:
# Data

print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(size=32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download = True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download = True, transform=transform_test)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) 

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [3]:
import matplotlib.pyplot as plt
import numpy as np

# 이미지를 보여주기 위한 함수


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# 학습용 이미지를 무작위로 가져오기
dataiter = iter(trainloader)
images, labels = dataiter.next()

# 이미지 보여주기
imshow(torchvision.utils.make_grid(images[:4]))
# 정답(label) 출력
print(' '.join('%5s' % classes[labels[:4][j]] for j in range(4)))

 bird horse truck  deer


In [4]:
class LeNet(nn.Module):
    # http://yann.lecun.com/exdb/lenet/
    def __init__(self):
        super(LeNet, self).__init__()
        # 6 1x5x5 convolution kernels
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )
        
        self.fc_layer = nn.Sequential(
            nn.Linear(256 * 4 * 4, 1024),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 84),
            nn.BatchNorm1d(84),
            nn.Linear(84, 10)
        )
        
    def forward(self, x):
        
        x = self.conv_layer(x)
        
        # convert (N, C, W, H) to (N, CxWxH)
        x = x.view(x.size(0), -1)
        
        x = self.fc_layer(x)
        
        return x

In [5]:
import visdom
import time

vis = visdom.Visdom()

def train(net, criterion, optimizer, lr, num_epoch):
    iters = 0
    plot_accu = vis.line(Y=torch.zeros(1), X=torch.zeros(1),
                            opts=dict(title='Accuracy', showlegend=True))
    time.sleep(0.1)
    plot_loss = vis.line(Y=torch.zeros(1), X=torch.zeros(1),
                            opts=dict(title='Loss', showlegend=True))
    #lr_decay_point = [2000, ]
    
    train_batch_size = len(list(trainloader))
    test_batch_size = len(list(testloader))
    
    for epoch in range(num_epoch):
        avg_train_loss = 0.0
        correct = 0.0
        total = 0.0
        if epoch == 7 or epoch == 14:
            lr *= 0.1
            optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
        
        for batch_index, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            avg_train_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            iters = iters + 1
            
            vis.line(Y=torch.tensor([loss.item()]), X=torch.tensor([iters]), 
                     win=plot_loss, update='append', name='train')
        
        vis.line(Y=torch.tensor([100.*correct/total]), X=torch.tensor([iters]), 
                     win=plot_accu, update='append', name='train')
        sys.stdout.write('Epoch %d | Loss %.5f | Training accuracy %.3f \n' 
                                 % (epoch + 1, avg_train_loss / train_batch_size, 100.*correct/total))
        
        loss = 0.0
        correct = 0.0
        total = 0.0
        
        for batch_index, (inputs, labels) in enumerate(testloader):
            inputs, labels = inputs.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            
            loss += loss.item()
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        
        vis.line(Y=torch.tensor([loss]), X=torch.tensor([iters]),
                 win=plot_loss, update='append', name='test')
        vis.line(Y=torch.tensor([100.*correct/total]), X=torch.tensor([iters]), 
                     win=plot_accu, update='append', name='test')
        sys.stdout.write('[[Test]] Epoch %d | Loss %.5f | Test accuracy %.3f \n' 
                                 % (epoch + 1, loss, 100.*correct/total))
        sys.stdout.flush()



In [6]:
learning_rate = [0.1]

for lr in learning_rate:
    print("learning rate: %f" % (lr))
    T_max = 20
    net = LeNet().cuda()
    net = nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    torch.backends.cudnn.benchmark = True
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    train(net, criterion, optimizer, lr, T_max)

learning rate: 0.100000
Epoch 1 | Loss 1.34629 | Training accuracy 50.854 
[[Test]] Epoch 1 | Loss 2.09258 | Test accuracy 63.970 
Epoch 2 | Loss 0.91153 | Training accuracy 67.630 
[[Test]] Epoch 2 | Loss 1.53014 | Test accuracy 71.530 
Epoch 3 | Loss 0.75538 | Training accuracy 73.618 
[[Test]] Epoch 3 | Loss 1.56711 | Test accuracy 75.460 
Epoch 4 | Loss 0.67130 | Training accuracy 76.660 
[[Test]] Epoch 4 | Loss 1.62935 | Test accuracy 75.550 
Epoch 5 | Loss 0.62225 | Training accuracy 78.474 
[[Test]] Epoch 5 | Loss 1.20271 | Test accuracy 77.290 
Epoch 6 | Loss 0.58110 | Training accuracy 80.094 
[[Test]] Epoch 6 | Loss 1.41700 | Test accuracy 78.510 
Epoch 7 | Loss 0.54578 | Training accuracy 81.338 
[[Test]] Epoch 7 | Loss 1.41559 | Test accuracy 78.520 
Epoch 8 | Loss 0.38160 | Training accuracy 87.084 
[[Test]] Epoch 8 | Loss 0.94722 | Test accuracy 84.860 
Epoch 9 | Loss 0.32504 | Training accuracy 88.838 
[[Test]] Epoch 9 | Loss 0.85055 | Test accuracy 85.840 
Epoch 10 | Lo

In [7]:
plot_accu = vis.line(Y=torch.zeros(1), X=torch.zeros(1),
                            opts=dict(title='Accuracy', showlegend=True))
print(20)
plot_loss = vis.line(Y=torch.zeros(1), X=torch.zeros(1),
                            opts=dict(title='Loss', showlegend=True))

20


In [8]:
# No BN 49%
# BN 60%
# Larger (not deeper) conv net 81%
# Deeper conv net 83%
# Deeper & proper learning rate sheduling (decaying) 88%