In [1]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torch.utils.data import sampler
from torchvision.datasets import ImageFolder


import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

This next few lines are all setup for the later problems.

In [2]:
NUM_TRAIN = 49000
batch_size= 64

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.

#===========================================================================#
# You should try changing the transform for the training data to include    #
# data augmentation such as RandomCrop and HorizontalFlip                    #
# when running the final part of the notebook where you have to achieve     #
# as high accuracy as possible on CIFAR-100.                                #
# Of course you will have to re-run this block for the effect to take place #
#===========================================================================#
train_transform = transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-100
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar100_train = dset.CIFAR100('./datasets/cifar100', train=True, download=True,
                             transform=train_transform)
loader_train = DataLoader(cifar100_train, batch_size=batch_size, num_workers=2,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar100_val = dset.CIFAR100('./datasets/cifar100', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar100_val, batch_size=batch_size, num_workers=2, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar100_test = dset.CIFAR100('./datasets/cifar100', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar100_test, batch_size=batch_size, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [2]:
USE_GPU = True
num_class = 102
dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)
import torch.nn.functional as F

using device: cuda


In [3]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

def test_flatten():
    x = torch.arange(12).view(2, 1, 3, 2)
    print('Before flattening: ', x)
    print('After flattening: ', flatten(x))

test_flatten()

Before flattening:  tensor([[[[ 0,  1],
          [ 2,  3],
          [ 4,  5]]],


        [[[ 6,  7],
          [ 8,  9],
          [10, 11]]]])
After flattening:  tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11]])


In [4]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: The accuracy of the model
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if (t + 1) % print_every == 0:
                print('Epoch %d, Iteration %d, loss = %.4f' % (e, t + 1, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()
    return check_accuracy_part34(loader_val, model)

In [27]:
def check_accuracy_part34(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    return acc

In [15]:
class VGGNet(nn.Module):
    
    def __init__(self):
        super(VGGNet, self).__init__()
        self.conv1_1 = nn.Conv2d(3,64,(3,3),padding = 1)
        self.conv1_2 = nn.Conv2d(64,64,(3,3),padding = 1)
        self.maxpool = nn.MaxPool2d((2,2),stride = 2)
        
        self.conv2_1 = nn.Conv2d(64,128,(3,3),padding = 1)
        self.conv2_2 = nn.Conv2d(128,128,(3,3),padding = 1)
        
        self.conv3_1 = nn.Conv2d(128,256,(3,3),padding = 1)
        self.conv3_2 = nn.Conv2d(256,256,(3,3),padding = 1)
        self.conv3_3 = nn.Conv2d(256,256,(3,3),padding = 1)
        
        self.conv4_1 = nn.Conv2d(256,512,(3,3),padding = 1)
        self.conv4_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv4_3 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.conv5_1 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_3 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_4 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.fc1 = nn.Linear(2048,100)

        
        self.relu = nn.LeakyReLU(0.1)
        
        self.bn1_1 = nn.BatchNorm2d(64)
        self.bn1_2 = nn.BatchNorm2d(64)
        
        self.bn2_1 = nn.BatchNorm2d(128)
        self.bn2_2 = nn.BatchNorm2d(128)

        self.bn3_1 = nn.BatchNorm2d(256)
        self.bn3_2 = nn.BatchNorm2d(256)
        self.bn3_3 = nn.BatchNorm2d(256)

        
        self.bn4_1 = nn.BatchNorm2d(512)
        self.bn4_2 = nn.BatchNorm2d(512)
        self.bn4_3 = nn.BatchNorm2d(512)

        
        self.bn5_1 = nn.BatchNorm2d(512)
        self.bn5_2 = nn.BatchNorm2d(512)
        self.bn5_3 = nn.BatchNorm2d(512)


        
        
    def forward(self,x):
        x = self.conv1_1(x)
        x = self.bn1_1(x)
        x = self.relu(x)

        x = self.conv1_2(x)
        x = self.bn1_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.conv2_1(x)
        x = self.bn2_1(x)
        x = self.relu(x)
        x = self.conv2_2(x)
        x = self.bn2_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.conv3_1(x)
        x = self.bn3_1(x)
        x = self.relu(x)
        x = self.conv3_2(x)
        x = self.bn3_2(x)
        x = self.relu(x)
        x = self.conv3_3(x)
        x = self.bn3_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.conv4_1(x)
        x = self.bn4_1(x)
        x = self.relu(x)
        x = self.conv4_2(x)
        x = self.bn4_2(x)
        x = self.relu(x)
        x = self.conv4_3(x)
        x = self.bn4_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        '''
        x = self.conv5_1(x)
        x = self.bn5_1(x)
        x = self.relu(x)
        x = self.conv5_2(x)
        x = self.bn5_2(x)
        x = self.relu(x)
        x = self.conv5_3(x)
        x = self.bn5_3(x)
        x = self.relu(x)
        x = self.conv5_4(x)
        x = self.bn5_4(x)
        x = self.relu(x)
        x = self.maxpool(x)
        '''
        x = x.reshape(x.size(0),x.size(1)*x.size(2)*x.size(3))
        
        x = self.fc1(x)

        scores = x
        return scores
    

In [20]:

learning_rate = 5e-4

model = VGGNet()

optimizer = optim.Adam(model.parameters(),lr = learning_rate)



print_every = 100
train_part34(model, optimizer, epochs=20)


Epoch 0, Iteration 100, loss = 4.1825
Checking accuracy on validation set
Got 47 / 1000 correct (4.70)

Epoch 0, Iteration 200, loss = 3.8715
Checking accuracy on validation set
Got 74 / 1000 correct (7.40)

Epoch 0, Iteration 300, loss = 3.6689
Checking accuracy on validation set
Got 86 / 1000 correct (8.60)

Epoch 0, Iteration 400, loss = 3.6793
Checking accuracy on validation set
Got 110 / 1000 correct (11.00)

Epoch 0, Iteration 500, loss = 3.5816
Checking accuracy on validation set
Got 133 / 1000 correct (13.30)

Epoch 0, Iteration 600, loss = 3.0306
Checking accuracy on validation set
Got 135 / 1000 correct (13.50)

Epoch 0, Iteration 700, loss = 3.5490
Checking accuracy on validation set
Got 170 / 1000 correct (17.00)

Epoch 1, Iteration 100, loss = 3.2502
Checking accuracy on validation set
Got 212 / 1000 correct (21.20)

Epoch 1, Iteration 200, loss = 3.0253
Checking accuracy on validation set
Got 204 / 1000 correct (20.40)

Epoch 1, Iteration 300, loss = 3.0360
Checking accur

0.568

In [21]:
learning_rate = 2.5e-4

model = VGGNet()

optimizer = optim.Adam(model.parameters(),lr = learning_rate)



print_every = 100
train_part34(model, optimizer, epochs=20)

Epoch 0, Iteration 100, loss = 4.0412
Checking accuracy on validation set
Got 87 / 1000 correct (8.70)

Epoch 0, Iteration 200, loss = 3.4720
Checking accuracy on validation set
Got 152 / 1000 correct (15.20)

Epoch 0, Iteration 300, loss = 3.4615
Checking accuracy on validation set
Got 155 / 1000 correct (15.50)

Epoch 0, Iteration 400, loss = 3.3739
Checking accuracy on validation set
Got 226 / 1000 correct (22.60)

Epoch 0, Iteration 500, loss = 2.8360
Checking accuracy on validation set
Got 232 / 1000 correct (23.20)

Epoch 0, Iteration 600, loss = 2.5977
Checking accuracy on validation set
Got 260 / 1000 correct (26.00)

Epoch 0, Iteration 700, loss = 2.2386
Checking accuracy on validation set
Got 277 / 1000 correct (27.70)

Epoch 1, Iteration 100, loss = 2.4467
Checking accuracy on validation set
Got 289 / 1000 correct (28.90)

Epoch 1, Iteration 200, loss = 2.1076
Checking accuracy on validation set
Got 339 / 1000 correct (33.90)

Epoch 1, Iteration 300, loss = 2.2931
Checking a

0.562

In [24]:
learning_rate = 3e-3

model = VGGNet()

optimizer = optim.RMSprop(model.parameters(),lr = learning_rate, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, foreach=None, maximize=False, differentiable=False)



print_every = 100
train_part34(model, optimizer, epochs=20)

Epoch 0, Iteration 100, loss = 4.6491
Checking accuracy on validation set
Got 13 / 1000 correct (1.30)

Epoch 0, Iteration 200, loss = 4.7147
Checking accuracy on validation set
Got 16 / 1000 correct (1.60)

Epoch 0, Iteration 300, loss = 4.5221
Checking accuracy on validation set
Got 32 / 1000 correct (3.20)

Epoch 0, Iteration 400, loss = 4.4726
Checking accuracy on validation set
Got 43 / 1000 correct (4.30)

Epoch 0, Iteration 500, loss = 4.3156
Checking accuracy on validation set
Got 49 / 1000 correct (4.90)

Epoch 0, Iteration 600, loss = 4.1085
Checking accuracy on validation set
Got 54 / 1000 correct (5.40)

Epoch 0, Iteration 700, loss = 4.1887
Checking accuracy on validation set
Got 67 / 1000 correct (6.70)

Epoch 1, Iteration 100, loss = 4.2545
Checking accuracy on validation set
Got 78 / 1000 correct (7.80)

Epoch 1, Iteration 200, loss = 3.7709
Checking accuracy on validation set
Got 82 / 1000 correct (8.20)

Epoch 1, Iteration 300, loss = 4.2907
Checking accuracy on valid

0.47

In [26]:
learning_rate = 1e-3

model = VGGNet()

optimizer = optim.SGD(model.parameters(),lr = learning_rate)



print_every = 100
train_part34(model, optimizer, epochs=20)

Epoch 0, Iteration 100, loss = 4.0428
Checking accuracy on validation set
Got 56 / 1000 correct (5.60)

Epoch 0, Iteration 200, loss = 4.0031
Checking accuracy on validation set
Got 101 / 1000 correct (10.10)

Epoch 0, Iteration 300, loss = 3.9446
Checking accuracy on validation set
Got 123 / 1000 correct (12.30)

Epoch 0, Iteration 400, loss = 3.8980
Checking accuracy on validation set
Got 133 / 1000 correct (13.30)

Epoch 0, Iteration 500, loss = 3.8591
Checking accuracy on validation set
Got 171 / 1000 correct (17.10)

Epoch 0, Iteration 600, loss = 3.5605
Checking accuracy on validation set
Got 177 / 1000 correct (17.70)

Epoch 0, Iteration 700, loss = 3.5798
Checking accuracy on validation set
Got 191 / 1000 correct (19.10)

Epoch 1, Iteration 100, loss = 3.2862
Checking accuracy on validation set
Got 213 / 1000 correct (21.30)

Epoch 1, Iteration 200, loss = 3.2741
Checking accuracy on validation set
Got 208 / 1000 correct (20.80)

Epoch 1, Iteration 300, loss = 3.4721
Checking a

0.465

Now lets try VGGNet with dropout after every convolutional layer, this helps stop overfitting (CNN's are much harder to overfit on but still worth a shot to see if it improves performance). we will use Adam with 5e-4 learning rate because it worked best in our last try (possibly just better random initialization) We still use LeakyReLU (.1) because I found it has slightly better accuracies and it can help prevent neuron death.

In [30]:
class VGGNet(nn.Module):
    
    def __init__(self):
        super(VGGNet, self).__init__()
        self.conv1_1 = nn.Conv2d(3,64,(3,3),padding = 1)
        self.conv1_2 = nn.Conv2d(64,64,(3,3),padding = 1)
        self.maxpool = nn.MaxPool2d((2,2),stride = 2)
        
        self.conv2_1 = nn.Conv2d(64,128,(3,3),padding = 1)
        self.conv2_2 = nn.Conv2d(128,128,(3,3),padding = 1)
        
        self.conv3_1 = nn.Conv2d(128,256,(3,3),padding = 1)
        self.conv3_2 = nn.Conv2d(256,256,(3,3),padding = 1)
        self.conv3_3 = nn.Conv2d(256,256,(3,3),padding = 1)
        
        self.conv4_1 = nn.Conv2d(256,512,(3,3),padding = 1)
        self.conv4_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv4_3 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.conv5_1 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_3 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_4 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.fc1 = nn.Linear(2048,100)

        
        self.relu = nn.LeakyReLU(.1)
        
        self.bn1_1 = nn.BatchNorm2d(64)
        self.bn1_2 = nn.BatchNorm2d(64)
        
        self.bn2_1 = nn.BatchNorm2d(128)
        self.bn2_2 = nn.BatchNorm2d(128)

        self.bn3_1 = nn.BatchNorm2d(256)
        self.bn3_2 = nn.BatchNorm2d(256)
        self.bn3_3 = nn.BatchNorm2d(256)

        
        self.bn4_1 = nn.BatchNorm2d(512)
        self.bn4_2 = nn.BatchNorm2d(512)
        self.bn4_3 = nn.BatchNorm2d(512)

        
        self.bn5_1 = nn.BatchNorm2d(512)
        self.bn5_2 = nn.BatchNorm2d(512)
        self.bn5_3 = nn.BatchNorm2d(512)
        
        
        self.dropout1 = nn.Dropout(.5)
        self.dropout2 = nn.Dropout(.5)
        self.dropout3 = nn.Dropout(.5)
        self.dropout4 = nn.Dropout(.5)


        
        
    def forward(self,x):
        x = self.conv1_1(x)
        x = self.bn1_1(x)
        x = self.relu(x)

        x = self.conv1_2(x)
        x = self.bn1_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout1(x)
        
        x = self.conv2_1(x)
        x = self.bn2_1(x)
        x = self.relu(x)
        x = self.conv2_2(x)
        x = self.bn2_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout2(x)
        
        x = self.conv3_1(x)
        x = self.bn3_1(x)
        x = self.relu(x)
        x = self.conv3_2(x)
        x = self.bn3_2(x)
        x = self.relu(x)
        x = self.conv3_3(x)
        x = self.bn3_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout3(x)
        
        x = self.conv4_1(x)
        x = self.bn4_1(x)
        x = self.relu(x)
        x = self.conv4_2(x)
        x = self.bn4_2(x)
        x = self.relu(x)
        x = self.conv4_3(x)
        x = self.bn4_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout4(x)
        
        '''
        x = self.conv5_1(x)
        x = self.bn5_1(x)
        x = self.relu(x)
        x = self.conv5_2(x)
        x = self.bn5_2(x)
        x = self.relu(x)
        x = self.conv5_3(x)
        x = self.bn5_3(x)
        x = self.relu(x)
        x = self.conv5_4(x)
        x = self.bn5_4(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        '''
        x = x.reshape(x.size(0),x.size(1)*x.size(2)*x.size(3))
        
        x = self.fc1(x)

        scores = x
        return scores
    

In [31]:
learning_rate = 5e-4

model = VGGNet()

optimizer = optim.Adam(model.parameters(),lr = learning_rate)



print_every = 100
train_part34(model, optimizer, epochs=20)

Epoch 0, Iteration 100, loss = 4.6582
Checking accuracy on validation set
Got 10 / 1000 correct (1.00)

Epoch 0, Iteration 200, loss = 4.3320
Checking accuracy on validation set
Got 18 / 1000 correct (1.80)

Epoch 0, Iteration 300, loss = 4.3329
Checking accuracy on validation set
Got 29 / 1000 correct (2.90)

Epoch 0, Iteration 400, loss = 4.0656
Checking accuracy on validation set
Got 61 / 1000 correct (6.10)

Epoch 0, Iteration 500, loss = 3.9793
Checking accuracy on validation set
Got 78 / 1000 correct (7.80)

Epoch 0, Iteration 600, loss = 3.6009
Checking accuracy on validation set
Got 91 / 1000 correct (9.10)

Epoch 0, Iteration 700, loss = 3.9208
Checking accuracy on validation set
Got 104 / 1000 correct (10.40)

Epoch 1, Iteration 100, loss = 3.5541
Checking accuracy on validation set
Got 122 / 1000 correct (12.20)

Epoch 1, Iteration 200, loss = 3.6891
Checking accuracy on validation set
Got 142 / 1000 correct (14.20)

Epoch 1, Iteration 300, loss = 3.6327
Checking accuracy on

0.605

Now that we have what seems to be a decent VGGNet version lets try on a different dataset. we are going to use torchvision.datasets.SVHN Street View House Numbers dataset, it is the same input image size as CIFAR-100 except now we have 10 classes digits (0-9) this dataset deals with lots of noise and background imagery which makes it a harder classification task compared to the MNIST handwritten digits.

In [42]:
NUM_TRAIN = 71257

train_transform = transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-100
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
SVHN_train = dset.SVHN('./datasets/SVHN', split = 'train', download=True,
                             transform=train_transform)
loader_train = DataLoader(SVHN_train, batch_size=batch_size, num_workers=2,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

SVHN_val = dset.SVHN('./datasets/SVHN', split = 'train', download=True,
                           transform=transform)
loader_val = DataLoader(SVHN_val, batch_size=batch_size, num_workers=2, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 73257)))

SVHN_test = dset.SVHN('./datasets/SVHN', split = 'test', download=True, 
                            transform=transform)
loader_test = DataLoader(SVHN_test, batch_size=batch_size, num_workers=2)

Using downloaded and verified file: ./datasets/SVHN/train_32x32.mat
Using downloaded and verified file: ./datasets/SVHN/train_32x32.mat
Using downloaded and verified file: ./datasets/SVHN/test_32x32.mat


In [43]:
class VGGNet(nn.Module):
    
    def __init__(self):
        super(VGGNet, self).__init__()
        self.conv1_1 = nn.Conv2d(3,64,(3,3),padding = 1)
        self.conv1_2 = nn.Conv2d(64,64,(3,3),padding = 1)
        self.maxpool = nn.MaxPool2d((2,2),stride = 2)
        
        self.conv2_1 = nn.Conv2d(64,128,(3,3),padding = 1)
        self.conv2_2 = nn.Conv2d(128,128,(3,3),padding = 1)
        
        self.conv3_1 = nn.Conv2d(128,256,(3,3),padding = 1)
        self.conv3_2 = nn.Conv2d(256,256,(3,3),padding = 1)
        self.conv3_3 = nn.Conv2d(256,256,(3,3),padding = 1)
        
        self.conv4_1 = nn.Conv2d(256,512,(3,3),padding = 1)
        self.conv4_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv4_3 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.conv5_1 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_3 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_4 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.fc1 = nn.Linear(2048,10)

        
        self.relu = nn.LeakyReLU(.1)
        
        self.bn1_1 = nn.BatchNorm2d(64)
        self.bn1_2 = nn.BatchNorm2d(64)
        
        self.bn2_1 = nn.BatchNorm2d(128)
        self.bn2_2 = nn.BatchNorm2d(128)

        self.bn3_1 = nn.BatchNorm2d(256)
        self.bn3_2 = nn.BatchNorm2d(256)
        self.bn3_3 = nn.BatchNorm2d(256)

        
        self.bn4_1 = nn.BatchNorm2d(512)
        self.bn4_2 = nn.BatchNorm2d(512)
        self.bn4_3 = nn.BatchNorm2d(512)

        
        self.bn5_1 = nn.BatchNorm2d(512)
        self.bn5_2 = nn.BatchNorm2d(512)
        self.bn5_3 = nn.BatchNorm2d(512)
        
        
        self.dropout1 = nn.Dropout(.5)
        self.dropout2 = nn.Dropout(.5)
        self.dropout3 = nn.Dropout(.5)
        self.dropout4 = nn.Dropout(.5)


        
        
    def forward(self,x):
        x = self.conv1_1(x)
        x = self.bn1_1(x)
        x = self.relu(x)

        x = self.conv1_2(x)
        x = self.bn1_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout1(x)
        
        x = self.conv2_1(x)
        x = self.bn2_1(x)
        x = self.relu(x)
        x = self.conv2_2(x)
        x = self.bn2_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout2(x)
        
        x = self.conv3_1(x)
        x = self.bn3_1(x)
        x = self.relu(x)
        x = self.conv3_2(x)
        x = self.bn3_2(x)
        x = self.relu(x)
        x = self.conv3_3(x)
        x = self.bn3_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout3(x)
        
        x = self.conv4_1(x)
        x = self.bn4_1(x)
        x = self.relu(x)
        x = self.conv4_2(x)
        x = self.bn4_2(x)
        x = self.relu(x)
        x = self.conv4_3(x)
        x = self.bn4_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout4(x)
        
        '''
        x = self.conv5_1(x)
        x = self.bn5_1(x)
        x = self.relu(x)
        x = self.conv5_2(x)
        x = self.bn5_2(x)
        x = self.relu(x)
        x = self.conv5_3(x)
        x = self.bn5_3(x)
        x = self.relu(x)
        x = self.conv5_4(x)
        x = self.bn5_4(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        '''
        x = x.reshape(x.size(0),x.size(1)*x.size(2)*x.size(3))
        
        x = self.fc1(x)

        scores = x
        return scores

now that we switched how the dataset is loaded (loading different datasets through pytorch require different methods) so we need to change our check accuracy function to run dataset.split

In [18]:
def check_accuracy_part34(loader, model):
    if loader.dataset.split == "train":
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    return acc

In [45]:
learning_rate = 5e-4

model = VGGNet()

optimizer = optim.Adam(model.parameters(),lr = learning_rate)



print_every = 100
train_part34(model, optimizer, epochs=20)

Epoch 0, Iteration 100, loss = 2.3144
Checking accuracy on validation set
Got 343 / 2000 correct (17.15)

Epoch 0, Iteration 200, loss = 2.3219
Checking accuracy on validation set
Got 343 / 2000 correct (17.15)

Epoch 0, Iteration 300, loss = 2.2659
Checking accuracy on validation set
Got 332 / 2000 correct (16.60)

Epoch 0, Iteration 400, loss = 2.2100
Checking accuracy on validation set
Got 342 / 2000 correct (17.10)

Epoch 0, Iteration 500, loss = 2.3358
Checking accuracy on validation set
Got 342 / 2000 correct (17.10)

Epoch 0, Iteration 600, loss = 2.2777
Checking accuracy on validation set
Got 318 / 2000 correct (15.90)

Epoch 0, Iteration 700, loss = 1.4576
Checking accuracy on validation set
Got 884 / 2000 correct (44.20)

Epoch 0, Iteration 800, loss = 1.5660
Checking accuracy on validation set
Got 1264 / 2000 correct (63.20)

Epoch 0, Iteration 900, loss = 0.9950
Checking accuracy on validation set
Got 1444 / 2000 correct (72.20)

Epoch 0, Iteration 1000, loss = 0.6976
Check

0.9535

We clearly see that such a deep CNN like our modified VGGNet can easily determine between classes of 10 and is easily able to learn extremely these noise filled digits with very high accuracy. Next we will explore a different dataset. The Caltech-101 datasets now need to be downloaded and placed inside Jupyterhub I couldnt find a way to get wget to work

We chose this dataset because its smaller than caltech 256 which we couldnt even load properly, and it has a lot of variation and will hopefully make our Deep network struggle, this dataset has different sizes for images all throughout and we need to first normalize and then we have even more freedom with how we reimplement VGGnet because the images will be larger and we will be able to build a deeper network. First we will resize images to 64x64 to see if we can get some good learning and generalization on images which have had serious compression applied to them

In [56]:
NUM_TRAIN = 7315
batch_size= 16 #too high and our code cannot run, but lower number is more erratic in its behaviour,
#hopefully running enough epochs can remove this vairability



#!unzip ./datasets/Caltech_101/caltech-101.zip -d ./datasets/Caltech_101

!tar -xzf ./datasets/Caltech_101/caltech-101/101_ObjectCategories.tar.gz -C ./datasets/Caltech_101/caltech-101

train_transform = transform = T.Compose([
                T.Resize(70),
                T.CenterCrop(64),
                T.ToTensor(),
                T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
            ])

dataset = ImageFolder(root='./datasets/Caltech_101/caltech-101/101_ObjectCategories', transform=transform)

train_size = NUM_TRAIN
val_size = len(dataset) - NUM_TRAIN




train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

loader_train = DataLoader(train_dataset, batch_size, shuffle=True)
loader_val = DataLoader(val_dataset, batch_size, shuffle=False)

In [37]:
print(len(dataset))
print(val_size)

9144
1829


In [5]:
def check_accuracy_part34(loader, model):

    print('Checking accuracy on validation set') #we will just be checking on the validation set for caltech 101
 
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        
    return acc

In [60]:
class VGGNet(nn.Module):
    
    def __init__(self):
        super(VGGNet, self).__init__()
        self.conv1_1 = nn.Conv2d(3,64,(3,3),padding = 1)
        self.conv1_2 = nn.Conv2d(64,64,(3,3),padding = 1)
        self.maxpool = nn.MaxPool2d((2,2),stride = 2)
        
        self.conv2_1 = nn.Conv2d(64,128,(3,3),padding = 1)
        self.conv2_2 = nn.Conv2d(128,128,(3,3),padding = 1)
        
        self.conv3_1 = nn.Conv2d(128,256,(3,3),padding = 1)
        self.conv3_2 = nn.Conv2d(256,256,(3,3),padding = 1)
        self.conv3_3 = nn.Conv2d(256,256,(3,3),padding = 1)
        
        self.conv4_1 = nn.Conv2d(256,512,(3,3),padding = 1)
        self.conv4_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv4_3 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.conv5_1 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_3 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_4 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.fc1 = nn.Linear(2048,102)

        
        self.relu = nn.LeakyReLU(.1)
        
        self.bn1_1 = nn.BatchNorm2d(64)
        self.bn1_2 = nn.BatchNorm2d(64)
        
        self.bn2_1 = nn.BatchNorm2d(128)
        self.bn2_2 = nn.BatchNorm2d(128)

        self.bn3_1 = nn.BatchNorm2d(256)
        self.bn3_2 = nn.BatchNorm2d(256)
        self.bn3_3 = nn.BatchNorm2d(256)

        
        self.bn4_1 = nn.BatchNorm2d(512)
        self.bn4_2 = nn.BatchNorm2d(512)
        self.bn4_3 = nn.BatchNorm2d(512)

        
        self.bn5_1 = nn.BatchNorm2d(512)
        self.bn5_2 = nn.BatchNorm2d(512)
        self.bn5_3 = nn.BatchNorm2d(512)
        self.bn5_4 = nn.BatchNorm2d(512)
        
        
        self.dropout1 = nn.Dropout(.5)
        self.dropout2 = nn.Dropout(.5)
        self.dropout3 = nn.Dropout(.5)
        self.dropout4 = nn.Dropout(.5)


        
        
    def forward(self,x):
        x = self.conv1_1(x)
        x = self.bn1_1(x)
        x = self.relu(x)

        x = self.conv1_2(x)
        x = self.bn1_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout1(x)
        
        x = self.conv2_1(x)
        x = self.bn2_1(x)
        x = self.relu(x)
        x = self.conv2_2(x)
        x = self.bn2_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout2(x)
        
        x = self.conv3_1(x)
        x = self.bn3_1(x)
        x = self.relu(x)
        x = self.conv3_2(x)
        x = self.bn3_2(x)
        x = self.relu(x)
        x = self.conv3_3(x)
        x = self.bn3_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout3(x)
        
        x = self.conv4_1(x)
        x = self.bn4_1(x)
        x = self.relu(x)
        x = self.conv4_2(x)
        x = self.bn4_2(x)
        x = self.relu(x)
        x = self.conv4_3(x)
        x = self.bn4_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout4(x)

        x = self.conv5_1(x)
        x = self.bn5_1(x)
        x = self.relu(x)
        x = self.conv5_2(x)
        x = self.bn5_2(x)
        x = self.relu(x)
        x = self.conv5_3(x)
        x = self.bn5_3(x)
        x = self.relu(x)
        x = self.conv5_4(x)
        x = self.bn5_4(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = x.reshape(x.size(0),x.size(1)*x.size(2)*x.size(3))
        
        x = self.fc1(x)

        scores = x
        return scores

In [62]:
learning_rate = 1e-4

model = VGGNet()

optimizer = optim.Adam(model.parameters(),lr = learning_rate)



print_every = 50
train_part34(model, optimizer, epochs=50)

Epoch 0, Iteration 50, loss = 3.9791
Checking accuracy on validation set
Got 90 / 1829 correct (4.92)

Epoch 0, Iteration 100, loss = 4.3987
Checking accuracy on validation set
Got 88 / 1829 correct (4.81)

Epoch 0, Iteration 150, loss = 4.9110
Checking accuracy on validation set
Got 89 / 1829 correct (4.87)

Epoch 0, Iteration 200, loss = 4.1788
Checking accuracy on validation set
Got 115 / 1829 correct (6.29)

Epoch 0, Iteration 250, loss = 3.7835
Checking accuracy on validation set
Got 191 / 1829 correct (10.44)

Epoch 0, Iteration 300, loss = 3.6928
Checking accuracy on validation set
Got 197 / 1829 correct (10.77)

Epoch 0, Iteration 350, loss = 2.7772
Checking accuracy on validation set
Got 294 / 1829 correct (16.07)

Epoch 0, Iteration 400, loss = 2.9726
Checking accuracy on validation set
Got 334 / 1829 correct (18.26)

Epoch 0, Iteration 450, loss = 2.4987
Checking accuracy on validation set
Got 330 / 1829 correct (18.04)

Epoch 1, Iteration 50, loss = 3.1785
Checking accuracy

KeyboardInterrupt: 

in this case we are seeing if our model which was constructed mainly for images of size 64x64 can actually perform well on a much larger image dataset size such as the Caltech dataset where every image has been reshaped to a size of 128x128 which puts it more similar to ImageNet.

We can see we got a final accuracy of 67% this is very good for a smaller number of epochs and the loss of information that comes with lowering the image sizes so much.

In [6]:
NUM_TRAIN = 7315
batch_size= 16 #too high and our code cannot run, but lower number is more erratic in its behaviour,
#hopefully running enough epochs can remove this vairability



#!unzip ./datasets/Caltech_101/caltech-101.zip -d ./datasets/Caltech_101

!tar -xzf ./datasets/Caltech_101/caltech-101/101_ObjectCategories.tar.gz -C ./datasets/Caltech_101/caltech-101

train_transform = transform = T.Compose([
                T.Resize(140),
                T.CenterCrop(128),
                T.ToTensor(),
                T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
            ])

dataset = ImageFolder(root='./datasets/Caltech_101/caltech-101/101_ObjectCategories', transform=transform)

train_size = NUM_TRAIN
val_size = len(dataset) - NUM_TRAIN




train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

loader_train = DataLoader(train_dataset, batch_size, shuffle=True)
loader_val = DataLoader(val_dataset, batch_size, shuffle=False)

In [7]:
class VGGNet(nn.Module):
    
    def __init__(self):
        super(VGGNet, self).__init__()
        self.conv1_1 = nn.Conv2d(3,64,(3,3),padding = 1)
        self.conv1_2 = nn.Conv2d(64,64,(3,3),padding = 1)
        self.maxpool = nn.MaxPool2d((2,2),stride = 2)
        
        self.conv2_1 = nn.Conv2d(64,128,(3,3),padding = 1)
        self.conv2_2 = nn.Conv2d(128,128,(3,3),padding = 1)
        
        self.conv3_1 = nn.Conv2d(128,256,(3,3),padding = 1)
        self.conv3_2 = nn.Conv2d(256,256,(3,3),padding = 1)
        self.conv3_2 = nn.Conv2d(256,256,(3,3),padding = 1)
        self.conv3_3 = nn.Conv2d(256,256,(3,3),padding = 1)
        
        self.conv4_1 = nn.Conv2d(256,512,(3,3),padding = 1)
        self.conv4_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv4_3 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.conv5_1 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_2 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_3 = nn.Conv2d(512,512,(3,3),padding = 1)
        self.conv5_4 = nn.Conv2d(512,512,(3,3),padding = 1)

        
        self.fc1 = nn.Linear(8192,102)

        
        self.relu = nn.LeakyReLU(.1)
        
        self.bn1_1 = nn.BatchNorm2d(64)
        self.bn1_2 = nn.BatchNorm2d(64)
        
        self.bn2_1 = nn.BatchNorm2d(128)
        self.bn2_2 = nn.BatchNorm2d(128)

        self.bn3_1 = nn.BatchNorm2d(256)
        self.bn3_2 = nn.BatchNorm2d(256)
        self.bn3_3 = nn.BatchNorm2d(256)

        
        self.bn4_1 = nn.BatchNorm2d(512)
        self.bn4_2 = nn.BatchNorm2d(512)
        self.bn4_3 = nn.BatchNorm2d(512)

        
        self.bn5_1 = nn.BatchNorm2d(512)
        self.bn5_2 = nn.BatchNorm2d(512)
        self.bn5_3 = nn.BatchNorm2d(512)
        self.bn5_4 = nn.BatchNorm2d(512)
        
        
        self.dropout1 = nn.Dropout(.5)
        self.dropout2 = nn.Dropout(.5)
        self.dropout3 = nn.Dropout(.5)
        self.dropout4 = nn.Dropout(.5)


        
        
    def forward(self,x):
        x = self.conv1_1(x)
        x = self.bn1_1(x)
        x = self.relu(x)
        x = self.conv1_2(x)
        x = self.bn1_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout1(x)
        
        x = self.conv2_1(x)
        x = self.bn2_1(x)
        x = self.relu(x)
        x = self.conv2_2(x)
        x = self.bn2_2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout2(x)
        
        x = self.conv3_1(x)
        x = self.bn3_1(x)
        x = self.relu(x)
        x = self.conv3_2(x)
        x = self.bn3_2(x)
        x = self.relu(x)
        x = self.conv3_3(x)
        x = self.bn3_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout3(x)
        
        x = self.conv4_1(x)
        x = self.bn4_1(x)
        x = self.relu(x)
        x = self.conv4_2(x)
        x = self.bn4_2(x)
        x = self.relu(x)
        x = self.conv4_3(x)
        x = self.bn4_3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout4(x)

        x = self.conv5_1(x)
        x = self.bn5_1(x)
        x = self.relu(x)
        x = self.conv5_2(x)
        x = self.bn5_2(x)
        x = self.relu(x)
        x = self.conv5_3(x)
        x = self.bn5_3(x)
        x = self.relu(x)
        x = self.conv5_4(x)
        x = self.bn5_4(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = x.reshape(x.size(0),x.size(1)*x.size(2)*x.size(3))
        
        x = self.fc1(x)

        scores = x
        return scores

In [8]:
learning_rate = 1e-5

model = VGGNet()

optimizer = optim.Adam(model.parameters(),lr = learning_rate)



print_every = 50
train_part34(model, optimizer, epochs=10)

KeyboardInterrupt: 

Clearly on 128x128 we see divergence now lets try lowering the learning rate maybe for convergence and lets switch to SGD


In [66]:
learning_rate = 5e-6

model = VGGNet()

optimizer = optim.SGD(model.parameters(),lr = learning_rate)



print_every = 50
train_part34(model, optimizer, epochs=50)

Epoch 0, Iteration 50, loss = 4.4152
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 100, loss = 4.9215
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 150, loss = 5.2485
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 200, loss = 4.5472
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 250, loss = 4.4507
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 300, loss = 4.6448
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 350, loss = 4.5711
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 400, loss = 4.7879
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 450, loss = 4.2628
Checking accuracy on validation set
Got 18 / 1829 correct (0.98)

Epoch 0, Iteration 500, loss = 5.0066
Checking accuracy on valida

KeyboardInterrupt: 

We still see divergence as if our model cant learn, from here we were suspicious because our model was clearly learning in 64x64 and it didnt make much sense that at 128x128 we would have an issue like this. So we tried with lowering learning rates because from what I remember in seminars is thta if you have divergence lower your learning rate and it will be ok, So I decided to play around, these runs take extremely long at 128x128

In [9]:
learning_rate = 1e-4

model = VGGNet()

optimizer = optim.Adam(model.parameters(),lr = learning_rate)



print_every = 50
train_part34(model, optimizer, epochs=5)

Epoch 0, Iteration 50, loss = 4.8958
Checking accuracy on validation set
Got 149 / 1829 correct (8.15)

Epoch 0, Iteration 100, loss = 4.8066
Checking accuracy on validation set
Got 149 / 1829 correct (8.15)

Epoch 0, Iteration 150, loss = 4.2809
Checking accuracy on validation set
Got 119 / 1829 correct (6.51)

Epoch 0, Iteration 200, loss = 3.1665
Checking accuracy on validation set
Got 99 / 1829 correct (5.41)

Epoch 0, Iteration 250, loss = 3.7704
Checking accuracy on validation set
Got 216 / 1829 correct (11.81)

Epoch 0, Iteration 300, loss = 2.7441
Checking accuracy on validation set
Got 125 / 1829 correct (6.83)

Epoch 0, Iteration 350, loss = 2.4720
Checking accuracy on validation set
Got 291 / 1829 correct (15.91)

Epoch 0, Iteration 400, loss = 2.8527
Checking accuracy on validation set
Got 343 / 1829 correct (18.75)

Epoch 0, Iteration 450, loss = 3.4708
Checking accuracy on validation set
Got 371 / 1829 correct (20.28)

Epoch 1, Iteration 50, loss = 3.1043
Checking accurac

0.5817386550027337

We then decided to test a few different learning rates overnight (as it turns out only one could get through 2 epochs in one night) and we found increasing the learning rate actually gave better performance its possible our learning rate was too low for the previous versions and that we just couldnt generalize fast enough with such low learning rate and small batch size. But from here we can see in only one epoch we have over 40% accuracy on a very hard dataset.