Basic CNN

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable # 계산을 마친 후 기울기를 구할 수 있게 해주는 패키지
from six.moves import urllib

batch_size = 64

opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
#Load data
new_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
datasets.MNIST.resources = [
   ('/'.join([new_mirror, url.split('/')[-1]]), md5)
   for url, md5 in datasets.MNIST.resources
]

train_datasets = datasets.MNIST(root = './data/', train = True, transform = transforms.ToTensor(), download = True)
test_datasets = datasets.MNIST(root = './data/', train = False, transform = transforms.ToTensor(), download = True)

train_dataloader = torch.utils.data.DataLoader(dataset = train_datasets, batch_size = batch_size, shuffle = True)
test_dataloader = torch.utils.data.DataLoader(dataset = test_datasets, batch_size = batch_size, shuffle = False)

#Define the Model
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    #28*28 -> conv1: (28-5)/1+1= 24 -> MaxPooling(2) : (24-2)/2+1 = 12 
    # -> conv2 : (12-5)/1+1 = 8 -> Maxpool : (8-2)/2+1 = 4   
    # 4*4*20 = 320
    self.conv1 = nn.Conv2d(1, 10, kernel_size = 5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size = 5)
    self.mp = nn.MaxPool2d(2, 2)
    self.fc = nn.Linear(4*4*20, 10)
  def forward(self, x):
    in_size = x.size(0)
    x = F.relu(self.mp(self.conv1(x))) 
    x = F.relu(self.mp(self.conv2(x)))
    x = x.view(in_size, -1)
    x = self.fc(x)
    return F.log_softmax(x)

model = Net()

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)

def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_dataloader):
    data, target = Variable(data), Variable(target)
    optimizer.zero_grad()
    output = model(data)

    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % 30 == 0:
      print('Train Epoch : {} [{}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(epoch, batch_idx*len(data), len(train_dataloader.dataset), 100.*batch_idx/len(train_dataloader), loss.item()))

def test():
  model.eval()
  test_loss = 0
  correct = 0
  for data, target in test_dataloader:
    data, target = Variable(data, volatile = True), Variable(target)
    output = model(data)
    test_loss = F.nll_loss(output, target, size_average = False).data
    pred = output.data.max(1, keepdim = True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()
  test_loss /= len(test_dataloader.dataset)

  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct,
                                                                               len(test_dataloader.dataset),
                                                                               100. * correct / len(test_dataloader.dataset)))  
  
for epoch in range(1, 10):
  train(epoch)
  test()










Test set: Average loss: 0.0002, Accuracy: 9413/10000 (94%)


Test set: Average loss: 0.0001, Accuracy: 9608/10000 (96%)


Test set: Average loss: 0.0000, Accuracy: 9714/10000 (97%)


Test set: Average loss: 0.0000, Accuracy: 9758/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9780/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9796/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9807/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9778/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9837/10000 (98%)



CNN More layers

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable # 계산을 마친 후 기울기를 구할 수 있게 해주는 패키지
from six.moves import urllib

batch_size = 64

opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
#Load data
new_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
datasets.MNIST.resources = [
   ('/'.join([new_mirror, url.split('/')[-1]]), md5)
   for url, md5 in datasets.MNIST.resources
]

train_datasets = datasets.MNIST(root = './data/', train = True, transform = transforms.ToTensor(), download = True)
test_datasets = datasets.MNIST(root = './data/', train = False, transform = transforms.ToTensor(), download = True)

train_dataloader = torch.utils.data.DataLoader(dataset = train_datasets, batch_size = batch_size, shuffle = True)
test_dataloader = torch.utils.data.DataLoader(dataset = test_datasets, batch_size = batch_size, shuffle = False)

#Define the Model
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size = 3, padding = 1)
    self.conv2 = nn.Conv2d(10, 20, kernel_size = 3, padding = 1)
    self.conv3 = nn.Conv2d(20, 40, kernel_size = 3,  stride = 2, padding = 0)
    self.mp = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(3*3*40, 180)
    self.fc2 = nn.Linear(180, 10)
  def forward(self, x):
    in_size = x.size(0)
    x = F.relu(self.mp(self.conv1(x))) 
    x = F.relu(self.mp(self.conv2(x)))
    x = F.relu(self.conv3(x))
    x = x.view(in_size, -1)
    x = self.fc1(x)
    x = self.fc2(x)
    return F.log_softmax(x)

model = Net()

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)

def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_dataloader):
    data, target = Variable(data), Variable(target)
    optimizer.zero_grad()
    output = model(data)

    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % 30 == 0:
      print('Train Epoch : {} [{}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(epoch, batch_idx*len(data), len(train_dataloader.dataset), 100.*batch_idx/len(train_dataloader), loss.item()))

def test():
  model.eval()
  test_loss = 0
  correct = 0
  for data, target in test_dataloader:
    data, target = Variable(data, volatile = True), Variable(target)
    output = model(data)
    test_loss = F.nll_loss(output, target, size_average = False).data
    pred = output.data.max(1, keepdim = True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()
  test_loss /= len(test_dataloader.dataset)

  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct,
                                                                               len(test_dataloader.dataset),
                                                                               100. * correct / len(test_dataloader.dataset)))  
  
for epoch in range(1, 10):
  train(epoch)
  test()










Test set: Average loss: 0.0004, Accuracy: 8854/10000 (89%)


Test set: Average loss: 0.0001, Accuracy: 9450/10000 (94%)


Test set: Average loss: 0.0000, Accuracy: 9647/10000 (96%)


Test set: Average loss: 0.0000, Accuracy: 9718/10000 (97%)


Test set: Average loss: 0.0000, Accuracy: 9711/10000 (97%)


Test set: Average loss: 0.0000, Accuracy: 9778/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9786/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9816/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9806/10000 (98%)



Inception Module

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable # 계산을 마친 후 기울기를 구할 수 있게 해주는 패키지
from six.moves import urllib

batch_size = 64

opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
#Load data
new_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
datasets.MNIST.resources = [
   ('/'.join([new_mirror, url.split('/')[-1]]), md5)
   for url, md5 in datasets.MNIST.resources
]

train_datasets = datasets.MNIST(root = './data/', train = True, transform = transforms.ToTensor(), download = True)
test_datasets = datasets.MNIST(root = './data/', train = False, transform = transforms.ToTensor(), download = True)

train_dataloader = torch.utils.data.DataLoader(dataset = train_datasets, batch_size = batch_size, shuffle = True)
test_dataloader = torch.utils.data.DataLoader(dataset = test_datasets, batch_size = batch_size, shuffle = False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
#Define the Inception
class InceptionA(nn.Module):
  def __init__(self, in_channels):
    super(InceptionA, self).__init__()
    self.branch1 = nn.Conv2d(in_channels, 24, kernel_size = 1)
    
    self.branch2 = nn.Conv2d(in_channels, 16, kernel_size = 1)
    
    self.branch31 = nn.Conv2d(in_channels, 16, kernel_size = 1)
    self.branch32 = nn.Conv2d(16, 24, kernel_size = 5, padding = 2)

    self.branch41 = nn.Conv2d(in_channels, 16, kernel_size = 1)
    self.branch42 = nn.Conv2d(16, 24, kernel_size = 3, padding = 1)
    self.branch43 =nn.Conv2d(24, 24, kernel_size = 3, padding = 1)
  def forward(self, x):
    branch1 = F.avg_pool2d(x, kernel_size = 3, stride = 1, padding = 1)
    branch1 = self.branch1(branch1)

    branch2 = self.branch2(x)

    branch3 = self.branch31(x)
    branch3 = self.branch32(branch3)

    branch4 = self.branch41(x)
    branch4 = self.branch42(branch4)
    branch4 = self.branch43(branch4)

    output = [branch1, branch2, branch3, branch4]
    return torch.cat(output, 1)

    
#Define the Model
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size = 5)
    self.conv2 = nn.Conv2d(88, 20, kernel_size = 5)

    self.inception1 = InceptionA(in_channels = 10)
    self.inception2 = InceptionA(in_channels = 20)

    self.mp = nn.MaxPool2d(2, 2)

    self.fc = nn.Linear(1408, 10)
  def forward(self, x):
    in_size = x.size(0)
    x = F.relu(self.mp(self.conv1(x))) 
    x = self.inception1(x)
    x = F.relu(self.mp(self.conv2(x)))
    x = self.inception2(x)
    x = x.view(in_size, -1)
    x = self.fc(x)

    return F.log_softmax(x)

model = Net()
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)

def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_dataloader):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)

    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % 30 == 0:
      print('Train Epoch : {} [{}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(epoch, batch_idx*len(data), len(train_dataloader.dataset), 100.*batch_idx/len(train_dataloader), loss.item()))

def test():
  model.eval()
  test_loss = 0
  correct = 0
  for data, target in test_dataloader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    test_loss = F.nll_loss(output, target, size_average = False).data
    pred = output.data.max(1, keepdim = True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()
  test_loss /= len(test_dataloader.dataset)

  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct,
                                                                               len(test_dataloader.dataset),
                                                                               100. * correct / len(test_dataloader.dataset)))  
  
for epoch in range(1, 10):
  train(epoch)
  test()








Test set: Average loss: 0.0001, Accuracy: 9471/10000 (95%)


Test set: Average loss: 0.0000, Accuracy: 9704/10000 (97%)


Test set: Average loss: 0.0000, Accuracy: 9727/10000 (97%)


Test set: Average loss: 0.0000, Accuracy: 9799/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9809/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9809/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9826/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9846/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9855/10000 (99%)

