In [20]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as Data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import copy

%matplotlib inline

use_cuda = torch.cuda.is_available()
if use_cuda:
    print('cuda available')
    
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
itype = torch.cuda.LongTensor if use_cuda else torch.LongTensor

# Hyper Parameters
IMG_SIZE = 28 * 28
NUM_CLASSES = 10
NUM_EPOCHS = 20
BATCH_SIZE = 64
LR = 0.001

# load data
train_dataset = datasets.MNIST(
    root = './data',
    train = True,
    transform = transforms.ToTensor(),
    download = False,
)

test_dataset = datasets.MNIST(
    root = './data',
    train = False,
    transform = transforms.ToTensor(),
)

train_loader = Data.DataLoader(
    dataset = train_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
)

test_loader = Data.DataLoader(
    dataset = test_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True
)

test_x = Variable(torch.unsqueeze(test_dataset.test_data, dim = 1)).type(torch.FloatTensor)[:2000]/255
test_y = test_dataset.test_labels[:2000]

cuda available


In [21]:
# model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels = 1,
                out_channels = 16, 
                kernel_size = 5,
                stride = 1,
                padding = 2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels = 16,
                out_channels = 32,
                kernel_size = 5,
                stride = 1,
                padding = 2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2)
        )
        self.out = nn.Linear(32 * 7 * 7, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        output = F.log_softmax(output, dim = 1)
        return output
    
    def retrieve_features(self, x):
        feature_map1 = self.conv1(x)
        feature_map2 = self.conv2(feature_map1)
        return(feature_map1, feature_map2)

In [22]:
# train
def rightness(predictions, labels):
    pred = torch.max(predictions.data, 1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights * 100 / len(labels)

net = CNN()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = LR)

for epoch in range(NUM_EPOCHS):
    for step, (x, y) in enumerate(train_loader):
        X = Variable(x)
        Y = Variable(y)
        
        net.train()
        output = net(X)
        loss = criterion(output, Y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step % 100 == 0:
            net.eval()
            test_output = net(test_x)
            rights = rightness(test_output, test_y)
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % rights)

Epoch:  0 | train loss: 2.3074 | test accuracy: 11.00
Epoch:  0 | train loss: 0.2399 | test accuracy: 91.00
Epoch:  0 | train loss: 0.3387 | test accuracy: 94.00
Epoch:  0 | train loss: 0.1227 | test accuracy: 95.00
Epoch:  0 | train loss: 0.2630 | test accuracy: 96.00
Epoch:  0 | train loss: 0.1941 | test accuracy: 96.00
Epoch:  0 | train loss: 0.0240 | test accuracy: 96.00
Epoch:  0 | train loss: 0.0262 | test accuracy: 97.00
Epoch:  0 | train loss: 0.0581 | test accuracy: 97.00
Epoch:  0 | train loss: 0.1611 | test accuracy: 97.00
Epoch:  1 | train loss: 0.0762 | test accuracy: 97.00
Epoch:  1 | train loss: 0.0069 | test accuracy: 97.00
Epoch:  1 | train loss: 0.0954 | test accuracy: 97.00
Epoch:  1 | train loss: 0.0549 | test accuracy: 98.00
Epoch:  1 | train loss: 0.0203 | test accuracy: 97.00
Epoch:  1 | train loss: 0.0676 | test accuracy: 98.00
Epoch:  1 | train loss: 0.0660 | test accuracy: 98.00
Epoch:  1 | train loss: 0.0197 | test accuracy: 98.00
Epoch:  1 | train loss: 0.12

Epoch:  15 | train loss: 0.0002 | test accuracy: 99.00
Epoch:  15 | train loss: 0.0000 | test accuracy: 99.00
Epoch:  15 | train loss: 0.0000 | test accuracy: 99.00
Epoch:  15 | train loss: 0.0001 | test accuracy: 99.00
Epoch:  15 | train loss: 0.0003 | test accuracy: 98.00
Epoch:  15 | train loss: 0.0054 | test accuracy: 99.00
Epoch:  15 | train loss: 0.0039 | test accuracy: 98.00
Epoch:  15 | train loss: 0.0001 | test accuracy: 98.00
Epoch:  15 | train loss: 0.0136 | test accuracy: 99.00
Epoch:  16 | train loss: 0.0000 | test accuracy: 98.00
Epoch:  16 | train loss: 0.0002 | test accuracy: 98.00
Epoch:  16 | train loss: 0.0269 | test accuracy: 98.00
Epoch:  16 | train loss: 0.0004 | test accuracy: 98.00
Epoch:  16 | train loss: 0.0359 | test accuracy: 98.00
Epoch:  16 | train loss: 0.0003 | test accuracy: 98.00
Epoch:  16 | train loss: 0.0000 | test accuracy: 98.00
Epoch:  16 | train loss: 0.0272 | test accuracy: 99.00
Epoch:  16 | train loss: 0.0001 | test accuracy: 98.00
Epoch:  16

In [23]:
torch.save(net, 'minst_conv_checkpoint')

  "type " + obj.__name__ + ". It won't be checked "
