# Convolutional Neural Network
- MNIST data
- 3 convolutional layers
- 2 fully connected layers

In [2]:
# 1. Settings
# 1) import required libraries
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable, Function
#from visdom import Visdom

In [5]:
class Swish(Function):
  @staticmethod
  def forward(ctx, i):
    result = i * i.sigmoid() #경사도 구하는 함수
    ctx.save_for_backward(result, i)
    return result

  @staticmethod
  def backward(ctx, grad_output):
    result, i =ctx.saved_variables
    sigmoid_x = i.sigmoid()
    return grad_output * (result + sigmoid_x * (1-result))

swish = Swish.apply

In [6]:
class Swish_module(nn.Module):
  def forward(self, x):
    return swish(x)

swish_layer = Swish_module()

In [27]:
# 2) Set hyperparameters

batch_size = 256
learning_rate = 0.0002
num_epoch = 2

In [21]:
# 2. Data
# Download Data

mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

In [22]:
# 2) Check Dataset
print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

torch.Size([1, 28, 28]) 60000


(torch.Size([1, 28, 28]), 10000)

In [23]:
# 3) Set DataLoader

train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=2, drop_last=True)

In [25]:
# 1) CNN Model

class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.layer = nn.Sequential(
        nn.Conv2d(1, 16, 5),
        #nn.ReLU(),
        swish_layer,
        nn.Conv2d(16, 32, 5),
        #nn.ReLU(),
        swish_layer,
        nn.MaxPool2d(2, 2),
        nn.Conv2d(32, 64, 5),
        #nn.ReLU(),
        swish_layer,
        nn.MaxPool2d(2, 2)
    )
    self.fc_layer = nn.Sequential(
        nn.Linear(64 * 3 * 3, 100),
        swish_layer,
        nn.Linear(100,10)
    )

  # method
  def forward(self,x):
    out = self.layer(x)
    out = out.view(batch_size, -1)
    out = self.fc_layer(out)

    return out

model = CNN().cuda()

In [28]:
# 2) Loss func & Optimizer

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [29]:
# 4. Train
for i in range(num_epoch):
  for j, [image, label] in enumerate(train_loader):
    x = Variable(image).cuda()
    y_ = Variable(label).cuda()

    optimizer.zero_grad()
    output = model.forward(x)
    loss = loss_func(output, y_)
    loss.backward()
    optimizer.step()

    if j % 1000 == 0:
      print(loss)


  result, i =ctx.saved_variables


tensor(2.2960, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2404, device='cuda:0', grad_fn=<NllLossBackward0>)


In [30]:
# 5. Test

correct = 0
total = 0

for image, label in test_loader:
  x = Variable(image, volatile=True).cuda()
  y_ = Variable(label).cuda()

  output = model.forward(x)
  _, output_index = torch.max(output, 1)

  total += label.size(0)
  correct += (output_index == y_).sum().float()

print("Accuracy of Test Data : {}".format(100*correct/total))

  x = Variable(image, volatile=True).cuda()


Accuracy of Test Data : 96.21394348144531
