In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable

In [2]:
#loading the dataset
trainDs = datasets.MNIST(root='./.data', train=True, transform=transforms.ToTensor(), download=True)
testDs = datasets.MNIST(root='./.data', train=False, transform=transforms.ToTensor())

In [3]:
batchSize = 100
epochs = 10

In [4]:
#Make the dataset iterable
trainLoader = torch.utils.data.DataLoader(dataset = trainDs, batch_size = batchSize, shuffle=True)
testLoader = torch.utils.data.DataLoader(dataset = testDs, batch_size = batchSize, shuffle=False)

In [5]:
print('There are {} images in the training set'.format(len(trainDs)))
print('There are {} images in the test set'.format(len(testDs)))
print('There are {} batches in the train loader'.format(len(trainLoader)))
print('There are {} batches in the testloader'.format(len(testLoader)))

There are 60000 images in the training set
There are 10000 images in the test set
There are 600 batches in the train loader
There are 100 batches in the testloader


In [6]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #Same Padding = [(filter size - 1) / 2] (Same Padding--> input size = output size)
        # input_channel is one because the images are b&w, out=8 is an arbitrary number
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3,stride=1, padding=1)
        #The output size of each of the 8 feature maps is 
        #[(input_size - filter_size + 2(padding) / stride) +1] --> [(28-3+2(1)/1)+1] = 28 (padding type is same)
        #Batch normalization
        self.batchnorm1 = nn.BatchNorm2d(8)
        #RELU
        self.relu = nn.ReLU()
        #Max poolin
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        #After max pooling, the output of each feature map is now 28/2 = 14
        self.cnn2 = nn.Conv2d(in_channels=8, out_channels=32, kernel_size=5, stride=1, padding=2)
        #Output size of each of the 32 feature maps remains 14
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        #After max pooling, the output of each feature map is 14/2 = 7
        #Flatten the feature maps. You have 32 feature maps, each of them is of size 7x7 --> 32*7*7 = 1568
        self.fc1 = nn.Linear(in_features=1568, out_features=600)
        self.droput = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(in_features=600, out_features=10) #out is the num of classses we have
    def forward(self,x):
        out = self.cnn1(x)
        out = self.batchnorm1(out)
        out = self.relu(out)
        out = self.maxpool1(out)
        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu(out)
        out = self.maxpool2(out)
        #Now we have to flatten the output. This is where we apply the feed forward neural network as learned before! 
        #It will take the shape (batch_size, 1568) = (100, 1568)
        out = out.view(-1,1568)
        #Then we forward through our fully connected layer 
        out = self.fc1(out)
        out = self.relu(out)
        out = self.droput(out)
        out = self.fc2(out)
        return out
        

In [7]:
model = CNN()
CUDA = torch.cuda.is_available()
if CUDA:
    model = model.cuda()
else:
    print('CUDA is not available')
lossFunction = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

CUDA is not available


In [8]:
iter = 0
for epoch in range(epochs):
    for i, (images, labels) in enumerate(trainLoader):
        iter += 1
        if CUDA:
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images)
            labels = Variable(labels)
            
        optimizer.zero_grad()
        outputs = model(images)
        loss = lossFunction(outputs, labels)
        loss.backward()
        optimizer.step()
        # Test the model every 100 iterarion, Calculate and print the testing accuracy
        if (i+1) % 100 == 0:
            correct = 0
            total = 0
            for images, labels in testLoader:
                if CUDA:
                    images = Variable(images.cuda())
                else:
                    images = Variable(images)
            
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            if CUDA:
                correct += (predicted.cpu() == labels.cpu()).sum()
            else:
                correct += (predicted == labels).sum()
                
            accuracy = 100 * correct / total
            print('Iteration {}, Training Loss: {}, Test Accuracy: {}%'.format(iter, loss.item(), accuracy))
print("TRAINING DONE!")

Iteration 100, Training Loss: 1.28413975239, Test Accuracy: 76%
Iteration 200, Training Loss: 0.636543154716, Test Accuracy: 83%
Iteration 300, Training Loss: 0.562572360039, Test Accuracy: 86%
Iteration 400, Training Loss: 0.35500022769, Test Accuracy: 84%
Iteration 500, Training Loss: 0.329743921757, Test Accuracy: 89%
Iteration 600, Training Loss: 0.278683722019, Test Accuracy: 89%
Iteration 700, Training Loss: 0.184824258089, Test Accuracy: 94%
Iteration 800, Training Loss: 0.335571944714, Test Accuracy: 93%
Iteration 900, Training Loss: 0.248443931341, Test Accuracy: 91%
Iteration 1000, Training Loss: 0.219458043575, Test Accuracy: 93%
Iteration 1100, Training Loss: 0.266739606857, Test Accuracy: 94%
Iteration 1200, Training Loss: 0.202846392989, Test Accuracy: 95%
Iteration 1300, Training Loss: 0.0656813308597, Test Accuracy: 94%
Iteration 1400, Training Loss: 0.161116108298, Test Accuracy: 94%
Iteration 1500, Training Loss: 0.0507277213037, Test Accuracy: 97%
Iteration 1600, Tra