## Convolutional Networks

We'll check out how to build a **convolutional network** to classify CIFAR10 images. By using weight sharing - multiple units with the same weights - convolutional layers are able to learn repeated patterns in your data. For example, a unit could learn the pattern for an eye, or a face, or lower level features like edges.


In [1]:
import numpy as np
import time

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import torch.utils.data as utils
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler


In [2]:
def get_train_valid_loader(data_dir='data',
                           batch_size=64,
                           augment=False,
                           random_seed = 1,
                           valid_size=0.02,
                           shuffle=True,
                           show_sample=False,
                           num_workers=4,
                           pin_memory=False):
    """
    Utility function for loading and returning train and valid
    multi-process iterators over the CIFAR-10 dataset. A sample
    9x9 grid of the images can be optionally displayed.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - augment: whether to apply the data augmentation scheme
      mentioned in the paper. Only applied on the train split.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
    - shuffle: whether to shuffle the train/validation indices.
    - show_sample: plot 9x9 sample grid of the dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    # visualize some images
    if show_sample:
        sample_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=9, shuffle=shuffle,
            num_workers=num_workers, pin_memory=pin_memory,
        )
        data_iter = iter(sample_loader)
        images, labels = data_iter.next()
        X = images.numpy().transpose([0, 2, 3, 1])
        plot_images(X, labels)

    return (train_loader, valid_loader)

trainloader, valloader = get_train_valid_loader()

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# class ConvNet(nn.Module):

#     def __init__(self, n_input_channels=3, n_output=10):
#         ################################################################################
#         # TODO:                                                                        #
#         # Define 2 or more different layers of the neural network                      #
#         ################################################################################
        
#         super().__init__()

#         self.conv1 = nn.Conv2d(n_input_channels, 6, 3)
#         self.conv2 = nn.Conv2d(6, 16, 3)
#         self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, n_output)
        
#         ################################################################################
#         #                              END OF YOUR CODE                                #
#         ################################################################################

#     def forward(self, x):
#         ################################################################################
#         # TODO:                                                                        #
#         # Set up the forward pass that the input data will go through.                 #
#         # A good activation function betweent the layers is a ReLu function.           #
#         #                                                                              #
#         # Note that the output of the last convolution layer should be flattened       #
#         # before being inputted to the fully connected layer. We can flatten           #
#         # Tensor `x` with `x.view`.                                                    #
#         ################################################################################
        
#         # Max pooling over a (2, 2) window
#         x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
#         x = F.max_pool2d(F.relu(self.conv2(x)), 2)
#         x = x.view(-1, self.num_flat_features(x))
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
        
#         ################################################################################
#         #                              END OF YOUR CODE                                #
#         ################################################################################
#         return x

#     def num_flat_features(self, x):
#         size = x.size()[1:]  # all dimensions except the batch dimension
#         num_features = 1
#         for s in size:
#             num_features *= s
#         return num_features
    
#     def predict(self, x):
#         outputs = self.forward(x)
#         _, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)
#         return predicted

In [4]:
class ConvNet(nn.Module):
    def __init__(self, n_input_channels=3, n_output=10):
        super().__init__()
        ################################################################################
        # TODO:                                                                        #
        # Define 2 or more different layers of the neural network                      #
        ################################################################################
        
        self.conv1 = nn.Conv2d(n_input_channels, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv4 = nn.Conv2d(32, 64, 3, padding=1)
        
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        
        self.fc1 = nn.Linear(8*8*64,256) 
        self.fc2 = nn.Linear(256, n_output)
        
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
    
    def forward(self, x):
        ################################################################################
        # TODO:                                                                        #
        # Set up the forward pass that the input data will go through.                 #
        # A good activation function betweent the layers is a ReLu function.           #
        #                                                                              #
        # Note that the output of the last convolution layer should be flattened       #
        # before being inputted to the fully connected layer. We can flatten           #
        # Tensor `x` with `x.view`.                                                    #
        ################################################################################
        
        x = F.leaky_relu(self.conv1(x),0.1)
        x = F.max_pool2d(F.leaky_relu(self.conv2(x),0.1), 2)
        x = self.dropout1(x)
        
        x = F.leaky_relu(self.conv3(x),0.1)
        x = F.max_pool2d(F.leaky_relu(self.conv4(x),0.1), 2)
        x = self.dropout1(x)
        
        x = torch.flatten(x, 1)
        
        x = F.leaky_relu(self.fc1(x),0.1)
        x = self.dropout2(x)
        x = self.fc2(x)
        
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        
        return x
    
    def predict(self, x):
        outputs = self.forward(x)
        _, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)
        return predicted

In [5]:
net = ConvNet()
print(net)

################################################################################
# TODO:                                                                        #
# Choose an Optimizer that will be used to minimize the loss function.         #
# Choose a criteria that measures the loss                                      #
################################################################################
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

ConvNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dropout1): Dropout2d(p=0.25, inplace=False)
  (dropout2): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=4096, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)


In [6]:
from torchsummary import summary
summary(net, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
            Conv2d-2           [-1, 32, 32, 32]           4,640
         Dropout2d-3           [-1, 32, 16, 16]               0
            Conv2d-4           [-1, 32, 16, 16]           9,248
            Conv2d-5           [-1, 64, 16, 16]          18,496
         Dropout2d-6             [-1, 64, 8, 8]               0
            Linear-7                  [-1, 256]       1,048,832
         Dropout2d-8                  [-1, 256]               0
            Linear-9                   [-1, 10]           2,570
Total params: 1,084,234
Trainable params: 1,084,234
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.66
Params size (MB): 4.14
Estimated Total Size (MB): 4.81
---------------------------------------

In [7]:
# def get_accuracy(dataloader):
#     correct = 0
#     for ii, (images, labels) in enumerate(dataloader):
#         predicted = net.predict(images)
#         correct += (predicted == labels).sum().item()
#         accuracy = 100 * correct / labels.size(0)
#     return ii, accuracy

def get_accuracy(dataloader):
    res = 0
    for ii, (images, labels) in enumerate(dataloader):
        predicted = net.predict(images)
        total = labels.size(0)
        correct = (predicted == labels).sum().item()
        res += 100*correct/total
    return ii, res

In [8]:
epochs = 1
steps = 0
running_loss = 0
print_every = 20

for e in range(epochs):
    start = time.time()
    for images, labels in iter(trainloader):
        
        steps += 1
    
        ################################################################################
        # TODO:                                                                        #
        # Run the training process                                                     #
        #                                                                              #
        # HINT: Calculate the gradient and move one step further                       #
        ################################################################################
        optimizer.zero_grad()
        output = net(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        
        running_loss += loss.item()
        
        if steps % print_every == 0:
            # Test accuracy
            net.eval()
            with torch.no_grad():
        
                ################################################################################
                # TODO:                                                                        #
                # Calculate the accuracy                                                       #
                ################################################################################

                ii_tr, accuracy_tr = get_accuracy(trainloader)
                ii_val, accuracy_val = get_accuracy(valloader)
                stop = time.time()
                ################################################################################
                #                              END OF YOUR CODE                                #
                ################################################################################   
                    
                print("Epoch: {}/{}".format(e+1, epochs),
                      "Loss: {:.4f}..".format(running_loss/print_every),
                      "Train accuracy: {:.4f}..".format(accuracy_tr/(ii_tr+1)),
                      "Test accuracy: {:.4f}..".format(accuracy_val/(ii_val+1)),
                      "{:.4f} s/step".format((stop - start)/print_every)
                     )
                
            running_loss = 0
            start = time.time()

Epoch: 1/1 Loss: 2.2224.. Train accuracy: 24.8205.. Test accuracy: 26.4648.. 3.3197 s/step
Epoch: 1/1 Loss: 1.9417.. Train accuracy: 31.2737.. Test accuracy: 33.7500.. 2.9048 s/step
Epoch: 1/1 Loss: 1.8508.. Train accuracy: 37.0386.. Test accuracy: 38.9844.. 3.0661 s/step
Epoch: 1/1 Loss: 1.7036.. Train accuracy: 39.7144.. Test accuracy: 39.2188.. 2.8791 s/step
Epoch: 1/1 Loss: 1.6568.. Train accuracy: 42.5742.. Test accuracy: 41.4648.. 2.9627 s/step
Epoch: 1/1 Loss: 1.5956.. Train accuracy: 44.6732.. Test accuracy: 45.0977.. 3.0322 s/step
Epoch: 1/1 Loss: 1.5246.. Train accuracy: 45.6091.. Test accuracy: 44.8438.. 2.9676 s/step
Epoch: 1/1 Loss: 1.4650.. Train accuracy: 48.0993.. Test accuracy: 48.1055.. 2.7853 s/step
Epoch: 1/1 Loss: 1.4795.. Train accuracy: 49.3652.. Test accuracy: 48.6328.. 2.7579 s/step
Epoch: 1/1 Loss: 1.4330.. Train accuracy: 47.6220.. Test accuracy: 48.5938.. 2.8181 s/step
Epoch: 1/1 Loss: 1.3860.. Train accuracy: 50.4598.. Test accuracy: 49.3359.. 2.9871 s/step

Save best trained model.

In [9]:
model_name = "model.ckpt"

In [10]:
torch.save(net.state_dict(), model_name)