# Exercise 2 - Objectives

#### Improve the accuracy of the network trained from scratch:
*    Deeper network (move convolutional layers, more fc layers)
*    Try a different optimizer such as Adam or RMSprop
*    Use a different or decaying learning rate

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from tqdm import trange
# To avoid the error URLError:<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


In [2]:
# Output of torchvision datasets are PILImage images of range [0, 1]. We transform them to Tensors of normalized range [-1, 1]
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Create the data loaders for the train and test sets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

# The 10 classes of CIFAR-10
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# Check the accuracy on the test set
def compute_accuracy(network, loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = network(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%\n' % (
        100 * correct / total))

In [4]:
# Check the accuracy for each class
def accuracy_per_class(network, loader):

    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = network(images)
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1

    print("Accuracies for the different classes:")
    for i in range(10):
        print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    
    print("\n")

# PART A: Make use of Deeper Network

* More Conv Layers
* More Fc Layers

OutputImage = $(I-K+ 2*P) / S +1 $

In [5]:
import torch.nn as nn
import torch.nn.functional as F

class DeeperNet(nn.Module):
    def __init__(self):
        super(DeeperNet, self).__init__()
        
        # Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,
        #bias=True, padding_mode='zeros')
        
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding = 1)
        self.fc1 = nn.Linear(64 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Check if the GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# Create the network on the CPU/GPU
net = DeeperNet().to(device)
net

cpu


DeeperNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

# Part B: change Hyperparameters:
*    Try a different optimizer such as Adam or RMSprop
*    Use a different or decaying learning rate

In [11]:
alpha = 0.01

decay_rate = 0.03

momentum = 0.9

In [12]:
# Define the loss and the optimizer

import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(net.parameters(), lr=alpha, momentum=momentum)

optimizerAdam = optim.Adam(net.parameters(), lr=alpha)

optimizerRMSprop = optim.RMSprop(net.parameters(), lr=alpha, momentum=momentum)

In [13]:
def train(net, epochs, trainloader, optimizer, network_name):
    for epoch in trange(epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')
    
    # Save the trained model
    PATH = f'./{network_name}.pth'
    torch.save(net.state_dict(), PATH)
    

In [None]:
train(net, 2, trainloader, optimizer=optimizerAdam, network_name="sgd_adam01")

  0%|          | 0/2 [00:00<?, ?it/s]

[1,  2000] loss: 1.992


In [None]:
def load_network(path):
    # Load the saved weights and create the network
    net = DeeperNet().to(device)
    net.load_state_dict(torch.load(path))
    
    return net

net_adam = load_network("adam_net01.pth")

In [None]:
compute_accuracy(net_adam, testloader)

In [None]:
accuracy_per_class(net_adam, testloader)

## Visualizing the Network Structure

In [None]:
print(net)

In [None]:
summary(net, (3, 32, 32))

In [None]:
from torchsummary import summary

summary(net, (3, 32, 32)) # (channels, height, width of input image)

In [None]:
def summarize(network, testloader):
    compute_accuracy(net, testloader)
    
    accuracy_per_class(net, testloader)
    
    summary(net, (3, 32, 32))

In [None]:
summarize(net, testloader)