Convolutional Neural Network to classify the CIFAR-10 Dataset

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as cm
import time
import copy
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision.transforms as transforms

from torchvision.datasets import CIFAR10

In [4]:
def count_corrects(net, batch, labels, func=None):
    '''Given a batch and its lables, it counts the number of corrects answers of a NN, func parameter applies 
    a modification to the tensor that contains the data.'''
    
    if func is not None:
        batch = func(batch)
        output = net(batch)
    else:
        output = net(batch)
    answers = output.max(dim=1)[1]
    correct_quantity = (answers==labels).sum()
    return correct_quantity

def calculate_global_precision(net, data_loader, batch_size, func=None, cuda=False):
    '''Calculate the precision of a NN given a data_loader, it receives a function that transforms the data'''
    correct = 0
    for (images, labels) in data_loader:
        if (cuda and torch.cuda.is_available()):
            images = images.cuda()
            labels = labels.cuda()
        correct += count_corrects(net, images, labels, func)
    correct = correct.data.tolist()
    return (100*correct)/(len(data_loader)*batch_size)

The CIFAR-10 dataset

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. 

The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class. 

In [6]:
#Load the data and divide it in train/test

train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, shuffle=True, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


To solve the classification problem we're going to use a neural network with the next arquitecture:

 An convolutional layer with a (5, 5) filter and 64 output filters with ReLu activation function.
 A maxpooling layer of (3, 3)
 Another convolutional layer with a (5, 5) filter and 32 filter output filters with ReLu activation function.
 A maxpooling layer of (3, 3)
 A fully connected layer with 384 neurons, ReLu activation function.
 A fully connected layer with 192 neurons, ReLu activation function.
 An output layer with 10 neurons.

In [8]:
import torch.nn.functional as F

class CIFAR_NET(nn.Module):
    def __init__(self):
        '''Initialize the network'''
        super(CIFAR_NET, self).__init__()
        self.features = nn.Sequential(
             # First convolutional layer, the input datatype are images with RGB so it will have 3 inputs and 64 outputs.
             nn.Conv2d(3, 64, 5),
             nn.ReLU(),
             # Pooling with stride 2
             nn.AvgPool2d(2, stride=2)
        )
            
        self.features1 = nn.Sequential(
            # Second layer with 64 inputs and 32 outputs.
            nn.Conv2d(64, 32, 5),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            ## Pooling with stride = 3
            nn.AvgPool2d(3, stride=3)
        )
        
        self.classifier = nn.Sequential(
            # Finally we use 2 fully connected layers
            nn.Linear(288, 384),
            nn.ReLU(),
            nn.Linear(384, 192),
            nn.ReLU(),
            nn.Linear(192, 10)
        )
        
    def forward(self, x):
        '''
        Defines the order of the forward propagation.
        '''
        x = self.features(x)
        x = self.features1(x)
        # It is necessary to flatten the data.
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    
    def train_cnn(self, model, data_loader, criterion, optimizer, cuda=Fasle):
        '''
        Defines the training function
        '''
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0
        ind = 0
        for epoch in range(epochs)