In [0]:
# Imports
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [2]:
# Loading training set and test set
training_set = datasets.MNIST('./data',
                              train=True,
                              transform=transforms.ToTensor(),
                              download=True)

test_set = datasets.MNIST('./data',
                          train=False,
                          transform=transforms.ToTensor(),
                          download=True)

  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 20350437.02it/s]                            


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw


32768it [00:00, 307729.92it/s]                           
0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 4984421.34it/s]                           
8192it [00:00, 128527.38it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


In [0]:
# Setting hyperparameters
learning_rate = 0.01
batch_size = 100
iterations = 4000  # Achieves 92+ test accuracy
epochs = int(iterations / (len(training_set) / batch_size))

In [0]:
# Making train and test loaders (making dataset iterable)
training_loader = torch.utils.data.DataLoader(dataset=training_set,
                                              batch_size=batch_size,
                                              shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                          batch_size=batch_size,
                                          shuffle=False)

In [0]:
# Creating convolutional neural network model class
class ConvolutionalNeuralNetwork(nn.Module):
    def __init__(self):
        super(ConvolutionalNeuralNetwork, self).__init__()

        # Convolution 1: Input 28x28x1, Filter 9x9x32 (same pad), Output 28x28x32
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=9, stride=1, padding=4, bias=True)
        self.relu1 = nn.ReLU();
        self.maxp1 = nn.MaxPool2d(2, 2)  # Output 14x14x32

        # Convolution 2: Input 14x14x32, Filter 7x7x64 (same pad), Output 14x14x64
        self.conv2 = nn.Conv2d(32, 64, 7, 1, 3, bias=True)
        self.relu2 = nn.ReLU();
        self.maxp2 = nn.MaxPool2d(2, 2)  # Output 7x7x64

        # Convolution 3: Input 7x7x64, Filter 3x3x128 (same pad), Output 7x7x128
        self.conv3 = nn.Conv2d(64, 128, 5, 1, 2, bias=True)
        self.relu3 = nn.ReLU();
        self.maxp3 = nn.MaxPool2d(2, 2)  # Output 3x3x128

        # Fully-connected layer 1: 768x512 (3x3x128 = 768)
        self.linear1 = nn.Linear(128 * 3 * 3, 512)
        self.relu4 = nn.ReLU()

        # Fully-connected layer 2: 512x10
        self.linear2 = nn.Linear(512, 10)

    def forward(self, x):
        # Conv1
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.maxp1(out)

        # Conv2
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.maxp2(out)

        # Conv3
        out = self.conv3(out)
        out = self.relu3(out)
        out = self.maxp3(out)

        # Flattening the output
        out = out.view(-1, 3 * 3 * 128)

        # FC1
        out = self.linear1(out)
        out = self.relu4(out)

        # FC2
        out = self.linear2(out)

        return out

In [7]:
# Instantiating the model
model = ConvolutionalNeuralNetwork()

if torch.cuda.is_available():
  model.cuda()
  print('All set on GPU!')

All set on GPU!


In [0]:
# Using cross-entropy loss
criterion = nn.CrossEntropyLoss()

# Using stochastic gradient descent as optimizer
optimizer = torch.optim.SGD(model.parameters(), learning_rate)


In [14]:
# Training loop
iter = 1
print('Now training...')
for epoch in range(epochs):
    for i, (images, labels) in enumerate(training_loader):

        # Convert data to Pytorch Variables
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        # Clear (previous) gradients
        optimizer.zero_grad()

        # Forward pass through the model
        output = model(images)

        # Calculate loss
        loss = criterion(output, labels)

        # Calculate gradients from the loss
        loss.backward()

        # Update model's paramters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                images = Variable(images.cuda())

                # Forward pass to get output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted.cpu() == labels.cpu()).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data, accuracy))

Now training...
Iteration: 500. Loss: 0.05022696033120155. Accuracy: 98
Iteration: 1000. Loss: 0.06893280893564224. Accuracy: 98
Iteration: 1500. Loss: 0.054727524518966675. Accuracy: 98
Iteration: 2000. Loss: 0.02707129903137684. Accuracy: 98
Iteration: 2500. Loss: 0.01298839133232832. Accuracy: 98
Iteration: 3000. Loss: 0.06773167103528976. Accuracy: 98
Iteration: 3500. Loss: 0.018859662115573883. Accuracy: 98


In [15]:
# Testing the model
print('Testing model...')
for i, (images, labels) in enumerate(test_loader):
    images = Variable(images.cuda())

    # Forward pass to get output
    outputs = model(images)

    # Get predictions from the maximum value
    _, predicted = torch.max(outputs.data, 1)

    print('Prediction: ', end='')
    print(predicted[0], end=' ')
    print('Label for Image %d : ' % i, end='')
    print(labels[0])

Testing model...
Prediction: tensor(7, device='cuda:0') Label for Image 0 : tensor(7)
Prediction: tensor(6, device='cuda:0') Label for Image 1 : tensor(6)
Prediction: tensor(3, device='cuda:0') Label for Image 2 : tensor(3)
Prediction: tensor(4, device='cuda:0') Label for Image 3 : tensor(4)
Prediction: tensor(2, device='cuda:0') Label for Image 4 : tensor(2)
Prediction: tensor(3, device='cuda:0') Label for Image 5 : tensor(3)
Prediction: tensor(6, device='cuda:0') Label for Image 6 : tensor(6)
Prediction: tensor(1, device='cuda:0') Label for Image 7 : tensor(1)
Prediction: tensor(8, device='cuda:0') Label for Image 8 : tensor(8)
Prediction: tensor(1, device='cuda:0') Label for Image 9 : tensor(1)
Prediction: tensor(9, device='cuda:0') Label for Image 10 : tensor(9)
Prediction: tensor(7, device='cuda:0') Label for Image 11 : tensor(7)
Prediction: tensor(8, device='cuda:0') Label for Image 12 : tensor(8)
Prediction: tensor(4, device='cuda:0') Label for Image 13 : tensor(4)
Prediction: t