Author: Pablo Nicolas Blanco

Student ID: a1609603

ASSIGNMENT 2 - Deep Learning Fundamentals

Description: Using Pytorch, this program implements a residual CNN for deep learning of the CIFAR 10 image data set. In this program, the depths for both convolutional and dense layers can be increased using the variables addResidualBlocksFlag, and extraDeepLayerFlag in the forward function of the Net class. The mini-batch size can also be specified in the data-loading code cell. The Adam optimiser with default parameters is used to train the network, together with a cross entropy loss. During training, the training and validation accuracy are printed out for each epoch. The network with parameters that achieve the highest validation accuracy is saved.  The trained network is then applied to a test data set, and test accuracies for the image classes are reported.

In [None]:
# imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import numpy as np



Load in data, convert to tensor, apply normalisation and set the mini batch size. Training and testing data sets are downloaded separately. The training set is further split into two sets; a training set and a validation set (using an 80%/20% random split).

In [None]:
# set the mini_batch size, the same batch size is to be used for the training, validation and test data
miniBatchSize = 10

# all data is to be converted to tensor data structure as well as normalised
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])  # mean of 0.5 and standard deviation of 0.5 for all three rgb channels

# load the training set (50000 images)
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

# put aside 20% of the original training set for validation during training, specify a pseudo-random number generation seed so that the validation set is always the same
trainset, validset = torch.utils.data.random_split(trainset,[40000,10000],generator=torch.Generator().manual_seed(42))

# put training data into a data loader, which is composed of mini-batches
trainloader = torch.utils.data.DataLoader(trainset, batch_size=miniBatchSize,
                                          shuffle=True, num_workers=2)

# put validation data into a data loader, which is composed of mini-batches
validloader = torch.utils.data.DataLoader(validset, batch_size=miniBatchSize,
                                          shuffle=True, num_workers=2)
# load the test set (10000 images)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

# put test data into a data loader, which is composed of mini-batches
testloader = torch.utils.data.DataLoader(testset, batch_size=miniBatchSize,
                                         shuffle=False, num_workers=2)

# class names in the order of the integer labels for them, i.e. 0 is airplane, 1 is automobile, 2 is bird, etc
class_names = ('airplane', 'automobile', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
      
        # batch normalisation, just the the number of channels
        # input to Conv2D is the number of channels, and the output is the number of filters (there are as many feature maps/channels generated as there are filters)
  
        self.conv6_kernel7_change = nn.Conv2d(3, 6, kernel_size=7, padding=3)  # change from 3 channels/feature maps to 6 channels/feature maps by applying 6 filters
        
        self.bn6_0 = nn.BatchNorm2d(6)
        self.bn6_1 = nn.BatchNorm2d(6)
        self.bn6_2 = nn.BatchNorm2d(6)
        self.bn6_3 = nn.BatchNorm2d(6)
        self.bn6_4 = nn.BatchNorm2d(6)
        
        self.pool = nn.MaxPool2d(2, 2) # window of size 2, applied with stride 2, this will reduce the image size by half (from 32x32 to 16x16)

        self.conv6_1 = nn.Conv2d(6, 6, kernel_size=3, padding=1)
        self.conv6_2 = nn.Conv2d(6, 6, kernel_size=3, padding=1)
        self.conv6_3 = nn.Conv2d(6, 6, kernel_size=3, padding=1)
        self.conv6_4 = nn.Conv2d(6, 6, kernel_size=3, padding=1)
        
        self.conv16_change = nn.Conv2d(6, 16, kernel_size=3, padding=1)
        self.conv16_change_in_skip = nn.Conv2d(6, 16, kernel_size=1)
        
        self.conv16_1 = nn.Conv2d(16, 16, kernel_size=3, padding=1)
        self.conv16_2 = nn.Conv2d(16, 16, kernel_size=3, padding=1)
        self.conv16_3 = nn.Conv2d(16, 16, kernel_size=3, padding=1)
        
        self.bn16_skip = nn.BatchNorm2d(16)
        self.bn16_0 = nn.BatchNorm2d(16)
        self.bn16_1 = nn.BatchNorm2d(16)
        self.bn16_2 = nn.BatchNorm2d(16)
        self.bn16_3 = nn.BatchNorm2d(16)
        self.bn16_4 = nn.BatchNorm2d(16)
                
        self.convDownsizeWithKernel7 = nn.Conv2d(16, 16, kernel_size=7) # has no padding, therefore will reduce feature map sizes by 6
        
        self.fullyConn1 = nn.Linear(16 * 5 * 5, 120)
        self.fullyConn2 = nn.Linear(120, 84)
        self.fullyConn3 = nn.Linear(84, 10)

        self.fullyConn2_alternative = nn.Linear(120,10)


    def forward(self, x):
        # Define the adjustable depth flags of the network
        addResidualBlocksFlag = False
        extraDeepLayerFlag = True

        # convolution with kernel size 7
        x = self.conv6_kernel7_change(x)
        x = self.bn6_0(x)
        x = F.relu(x)

        # make it 16x16 with max pooling
        x = self.pool(x)

        # FIRST CONV 6 RESIDUAL BLOCK (composed of two convolutions, implementation of channel quantity change not needed since the change from 3 to 6 feature maps is done in previous conv layer)
        # make a residual to be used as a skip connection
        residual = x
        x = self.conv6_1(x)
        x = self.bn6_1(x)
        x = F.relu(x)
        x = self.conv6_2(x)
        x = self.bn6_2(x)
        x = x + residual  # the skip connection almost skips two convolutional layers, i.e. it is put just before the relu of the second convolutional layer
        x = F.relu(x) 

        if (addResidualBlocksFlag):
          residual = x
          x = self.conv6_3(x)
          x = self.bn6_3(x)
          x = F.relu(x)
          x = self.conv6_4(x)
          x = self.bn6_4(x)
          x = x + residual  # the skip connection almost skips two convolutional layers, i.e. it is put just before the relu of the second convolutional layer
          x = F.relu(x) 

        # FIRST CONV 16 RESIDUAL BLOCK (composed of two convolutions, implementation of channel quantity change is needed both in first convolution or in a 1x1 convolution in the skip connection)
        residual = x
        x = self.conv16_change(x)
        x = self.bn16_0(x)
        x = F.relu(x)
        x = self.conv16_1(x)
        x = self.bn16_1(x)
        x = x + self.bn16_skip(self.conv16_change_in_skip(residual)) # 1x1 convolutional layer implements channel quantity change in skip connection, since later layers expect 16 feature maps
        x = F.relu(x)

        if (addResidualBlocksFlag):
          residual = x
          x = self.conv16_2(x)
          x = self.bn16_2(x)
          x = F.relu(x)
          x = self.conv16_3(x)
          x = self.bn16_3(x)
          x = x + residual  # the skip connection almost skips two convolutional layers, i.e. it is put just before the relu of the second convolutional layer
          x = F.relu(x) 


        # Reduce feature map size from 16x16 to 10x10 with a kernel of 7 without padding
        x = self.convDownsizeWithKernel7(x)
        x = self.bn16_4(x)
        x = F.relu(x)

        # Reduce feature map size from 10x10 to 5x5 with max pooling
        x = self.pool(x)
       
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fullyConn1(x))
        if (extraDeepLayerFlag):
          x = F.relu(self.fullyConn2(x))
          x = self.fullyConn3(x) # output of last dense layer goes to the softmax activatino function, hence relu is not applied
        else:
          x = self.fullyConn2_alternative(x) # output of last dense layer goes to the softmax activatino function, hence relu is not applied
        return x

# instantiate the network that will be trained
net = Net()
print(net)

Net(
  (conv6_kernel7_change): Conv2d(3, 6, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
  (bn6_0): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn6_1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn6_2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn6_3): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn6_4): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv6_1): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6_2): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6_3): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6_4): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv16_change): Conv2d(6, 16, kernel_size=(3, 3), stride=(1

In [None]:
# define the device, try to get a CUDA machine if available, if not then use cpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)
# assign network to device
net.to(device)
print(net.conv6_1.bias.get_device())

cuda:0
0


In [None]:
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0)
optimizer = torch.optim.Adam(net.parameters())

In [None]:
# store epochs, and the training and validation accuracies for each opech in these lists, these are printed and used later for postprocessing
epochs_list = []
train_accuracy_list = []
valid_accuracy_list = []

# total number of epochs used to train the model
total_epochs = 20
# variable to store the maximum validation accuracy, initialise as zero
max_valid_accuracy = 0.0

# save the network to this path
PATH = './saved_net.pth'

# iterate over the specified number of epochs
for epoch in range(total_epochs):  # loop over the dataset multiple times
    
    train_total = 0.0
    train_correct = 0.0

    net.train() # this is needed because batch norm layers are present in the network, and they are meant to behave differently in training than they do in testing/evaluation 
    # iterate over all the mini batches that make up one epoch (i.e. the whole training set), and assign variable i to keep count of mini-batches 
    for i, mini_batch_data in enumerate(trainloader, 0):
        # mini_batch_data is a list of [inputs, labels], separate these into different objects, and send them to the GPU at every step
        inputs, labels = mini_batch_data[0].to(device), mini_batch_data[1].to(device)

        # zero the loss gradients with respect to parameters
        optimizer.zero_grad()
        # pass the inputs forward through the network, and obtain the outputs used for prediction
        outputs = net(inputs)
        # average loss for the mini-batch (which would be a specified number of images, as specified in the transform)
        loss = criterion(outputs, labels)   
        # backpropagation to calculate the partial gradients of the loss with respect to each parameter
        loss.backward()
        # optimize the parameters in the network by updating them based on the loss gradient with respect to a parameter, and the learning rate
        optimizer.step()

        # keep counts of total samples, and of correctly predicted samples
        a, pred = outputs.max(1)
        train_total += labels.size(0)
        train_correct += pred.eq(labels).sum().item()

    valid_total = 0.0
    valid_correct = 0.0

    net.eval() # this is needed because batch norm layers are present in the network, and they are meant to behave differently in training than they do in testing/evaluation 
    # iterate over all the mini batches in the validation set
    for j, mini_batch_data in enumerate(validloader, 0):
        # mini_batch_data is a list of [inputs, labels], separate these into different objects, and send them to the GPU at every step
        inputs_valid, labels_valid = mini_batch_data[0].to(device), mini_batch_data[1].to(device)

        # pass the inputs forward through the network, and obtain the outputs used for prediction
        outputs_valid = net(inputs_valid)
       
        # keep counts of total samples, and of correctly predicted samples
        a, pred = outputs_valid.max(1)
        valid_total += labels_valid.size(0)
        valid_correct += pred.eq(labels_valid).sum().item()

    epoch_num = epoch + 1
    train_accuracy = 100*train_correct/train_total # divide the sum of the mini-batch losses by the number of mini-batches, this gives the average loss for an epoch, i.e. the empirical risk
    valid_accuracy = 100*valid_correct/valid_total

    print('Epoch: %d  Train Accuracy: %.1f %%  Validation Accuracy: %.1f %%' %
                  (epoch_num, train_accuracy , valid_accuracy))

    epochs_list.append(epoch_num)
    train_accuracy_list.append(train_accuracy)
    valid_accuracy_list.append(valid_accuracy)

    # if there is an increase in validation accuracy, save the network
    if valid_accuracy > max_valid_accuracy:
      max_valid_accuracy = valid_accuracy
      torch.save(net.state_dict(), PATH)

print('Training Completed')

print('The values in list form are printed below:')
print(epochs_list)
print(train_accuracy_list)
print(valid_accuracy_list)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: 1  Train Accuracy: 43.1 %  Validation Accuracy: 54.6 %
Epoch: 2  Train Accuracy: 56.0 %  Validation Accuracy: 59.9 %
Epoch: 3  Train Accuracy: 61.6 %  Validation Accuracy: 62.0 %
Epoch: 4  Train Accuracy: 64.9 %  Validation Accuracy: 66.0 %
Epoch: 5  Train Accuracy: 67.7 %  Validation Accuracy: 65.0 %
Epoch: 6  Train Accuracy: 69.7 %  Validation Accuracy: 66.4 %
Epoch: 7  Train Accuracy: 71.1 %  Validation Accuracy: 67.2 %
Epoch: 8  Train Accuracy: 72.4 %  Validation Accuracy: 68.0 %
Epoch: 9  Train Accuracy: 73.7 %  Validation Accuracy: 67.9 %
Epoch: 10  Train Accuracy: 74.9 %  Validation Accuracy: 66.9 %
Epoch: 11  Train Accuracy: 75.8 %  Validation Accuracy: 68.1 %
Epoch: 12  Train Accuracy: 76.7 %  Validation Accuracy: 67.7 %
Epoch: 13  Train Accuracy: 77.7 %  Validation Accuracy: 67.8 %
Epoch: 14  Train Accuracy: 78.4 %  Validation Accuracy: 67.4 %
Epoch: 15  Train Accuracy: 79.2 %  Validation Accuracy: 66.3 %
Epoch: 16  Train Accuracy: 79.7 %  Validation Accuracy: 66.9 %
E

In [None]:
# initialise a network and then load the saved model from training
net = Net()
net.load_state_dict(torch.load(PATH))

# set the network to testing mode
net.eval()

# calculate the accuracy for each class, and also the average accuracy

correctly_predicted_counts = [0,0,0,0,0,0,0,0,0,0]
total_counts = [0,0,0,0,0,0,0,0,0,0]
with torch.no_grad():
    for data in testloader:
        # obtain the images (inputs) and the correct classes (labels) for the mini-batch
        inputs, labels = data
        # pass the images  of the mini-batch through the network and obtain the softmax output
        outputs = net(inputs)
        # obtain the predicted classes for this mini-batch by, for each sample, selecting the class with the highest score
        a, pred = torch.max(outputs, 1)
        # iterate through the samples in the mini-batch
        for i in range(miniBatchSize):
            # the integer class labels are used as indices for assigning counts to the lists that keep count of the correct and total samples for each class
            # if the prediction matched the label, add to the correct count
            if (pred[i] == labels[i]):
              correctly_predicted_counts[labels[i]] += 1
            total_counts[labels[i]] += 1

# print individual class accuracies and the average accuracy

average_accuracy = 0
for i in range(10):
    accuracy =  100 * correctly_predicted_counts[i] / total_counts[i]
    average_accuracy += accuracy
    print('Accuracy of %5s : %.1f %%' % (
        class_names[i], accuracy ))
    
average_accuracy = average_accuracy / 10
print('Average accuracy: %.1f %%' % (
        average_accuracy ))

Accuracy of airplane : 73.5 %
Accuracy of automobile : 81.5 %
Accuracy of  bird : 52.4 %
Accuracy of   cat : 50.0 %
Accuracy of  deer : 55.6 %
Accuracy of   dog : 56.1 %
Accuracy of  frog : 78.7 %
Accuracy of horse : 76.5 %
Accuracy of  ship : 74.2 %
Accuracy of truck : 80.4 %
Average accuracy: 67.9 %
