In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable


In [2]:
# Hyper parameters
epochs = 5
batch_size = 32
learning_rate = 0.001

In [3]:
# datasets, dataloader
train_dsets = dsets.MNIST(root='../data/', train=True, download=True, transform=transforms.ToTensor())
test_dsets = dsets.MNIST(root='../data/', train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dsets, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dsets, batch_size=batch_size, shuffle=False)

## CNN

In [9]:
# CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(1, 16, kernel_size=5, padding=2), 
                                    nn.BatchNorm2d(16), 
                                    nn.ReLU(), 
                                    nn.MaxPool2d(2)) # 16x14x14
        
        self.layer2 = nn.Sequential(nn.Conv2d(16, 32, kernel_size=5, padding=2), 
                                    nn.BatchNorm2d(32), 
                                    nn.ReLU(), 
                                    nn.MaxPool2d(2)) # 32x7x7
        
        self.fc = nn.Linear(32*7*7, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [10]:
# create model
cnn = CNN()

# Loss , Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)

In [11]:
# Training
i = 0
for epoch in range(epochs):
    for images, labels in train_loader:
        images = Variable(images)
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        i += 1
        if i % 200 == 0:
            print("Epoch: {}/{}...".format(epoch+1, epochs), 
                  "Iterations: {}...".format(i), 
                  "Loss: {}".format(loss.data[0]))

Epoch: 1/5... Iterations: 200... Loss: 0.20349068939685822
Epoch: 1/5... Iterations: 400... Loss: 0.010171846486628056
Epoch: 1/5... Iterations: 600... Loss: 0.01544178370386362
Epoch: 1/5... Iterations: 800... Loss: 0.023609258234500885
Epoch: 1/5... Iterations: 1000... Loss: 0.023287981748580933
Epoch: 1/5... Iterations: 1200... Loss: 0.2359127551317215
Epoch: 1/5... Iterations: 1400... Loss: 0.05197572708129883
Epoch: 1/5... Iterations: 1600... Loss: 0.008794616907835007
Epoch: 1/5... Iterations: 1800... Loss: 0.07880351692438126
Epoch: 2/5... Iterations: 2000... Loss: 0.07158299535512924
Epoch: 2/5... Iterations: 2200... Loss: 0.01069680880755186
Epoch: 2/5... Iterations: 2400... Loss: 0.02295682765543461
Epoch: 2/5... Iterations: 2600... Loss: 0.03023640625178814
Epoch: 2/5... Iterations: 2800... Loss: 0.1954856961965561
Epoch: 2/5... Iterations: 3000... Loss: 0.0034680014941841364
Epoch: 2/5... Iterations: 3200... Loss: 0.11457661539316177
Epoch: 2/5... Iterations: 3400... Loss: 

In [15]:
# Testing
cnn.eval() # Sets the module in evaluation mode (BN uses moving mean/var)

correct = total = 0
for images, labels in test_loader:
    images = Variable(images)
    # labels = Variable(labels)
    
    outputs = cnn(images) # print(outputs.data.size()) 32x10
    _, pred = torch.max(outputs.data, 1) # max_value, max_value_index(argmax)
    total += labels.size(0)
    correct += (pred == labels).sum()

print('Accuracy: {}%'.format(100*correct/total))

Accuracy: 99.04%


In [16]:
print(cnn)

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (fc): Linear(in_features=1568, out_features=10, bias=True)
)


In [19]:
[param.nelement() for param in cnn.parameters()]

[400, 16, 16, 16, 12800, 32, 32, 32, 15680, 10]