In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.datasets as dset
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader, sampler

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from models import AlexNet, VGGNet, GoogLeNet
from utils import utils

In [2]:
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()

class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples

In [3]:
transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
cifar10_train = dset.CIFAR10('data/', train=True, download=True, 
                             transform=transform)
cifar10_test = dset.CIFAR10('data/', train=False, download=True, 
                            transform=transform)
NUM_TRAIN = 49000
NUM_VAL = 1000

train_loader = DataLoader(cifar10_train, batch_size=64, 
                          sampler=ChunkSampler(NUM_TRAIN, 0))
val_loader = DataLoader(cifar10_train, batch_size=64, 
                        sampler=ChunkSampler(NUM_VAL, NUM_TRAIN))
test_loader = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
torch.cuda.is_available()

True

In [5]:
dtype = torch.cuda.FloatTensor

# AlexNet

In [6]:
alexnet = AlexNet.AlexNet(10, dtype)
alexnet_loss_fn = nn.CrossEntropyLoss().type(dtype)
alexnet_optimizer = optim.Adam(alexnet.parameters(),lr=1e-4)

In [7]:
torch.cuda.random.manual_seed(12345)
alexnet.apply(reset)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(5, 5))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=True)
    (1): Linear(in_features=512, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=10, bias=T

In [8]:
alexnet_trainer = utils.trainier(alexnet, alexnet_optimizer, dtype)

In [9]:
alexnet_trainer.train(10, alexnet_loss_fn, train_loader, val_loader, 1)

Epoch 1/10 => Time: 31.65sec, Train avg loss: 1.7859, train acc: 44.34%, val acc: 45.00%
Epoch 2/10 => Time: 30.99sec, Train avg loss: 1.4201, train acc: 52.89%, val acc: 52.20%
Epoch 3/10 => Time: 30.61sec, Train avg loss: 1.2540, train acc: 58.16%, val acc: 54.80%
Epoch 4/10 => Time: 29.99sec, Train avg loss: 1.1332, train acc: 62.59%, val acc: 57.20%
Epoch 5/10 => Time: 30.05sec, Train avg loss: 1.0203, train acc: 66.38%, val acc: 59.90%
Epoch 6/10 => Time: 29.59sec, Train avg loss: 0.9084, train acc: 69.83%, val acc: 59.40%
Epoch 7/10 => Time: 29.99sec, Train avg loss: 0.7991, train acc: 72.37%, val acc: 60.80%
Epoch 8/10 => Time: 30.12sec, Train avg loss: 0.7173, train acc: 73.38%, val acc: 58.90%
Epoch 9/10 => Time: 30.08sec, Train avg loss: 0.6373, train acc: 76.42%, val acc: 59.40%
Epoch 10/10 => Time: 30.12sec, Train avg loss: 0.5478, train acc: 79.86%, val acc: 60.40%


# VGGNet

In [11]:
vggnet = VGGNet.VGGNet11(10, dtype)
vggnet_loss_fn = nn.CrossEntropyLoss().type(dtype)
vggnet_optimizer = optim.Adam(vggnet.parameters(),lr=1e-4)
torch.cuda.random.manual_seed(12345)
vggnet.apply(reset)

VGGNet11(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (16): Conv2d(512, 512,

In [12]:
vggnet_trainer = utils.trainier(vggnet, vggnet_optimizer, dtype)

In [13]:
vggnet_trainer.train(10, vggnet_loss_fn, train_loader, val_loader, 1)

Epoch 1/10 => Time: 35.78sec, Train avg loss: 1.7181, train acc: 47.35%, val acc: 49.00%
Epoch 2/10 => Time: 36.08sec, Train avg loss: 1.3030, train acc: 59.24%, val acc: 59.80%
Epoch 3/10 => Time: 37.01sec, Train avg loss: 1.0878, train acc: 65.30%, val acc: 63.30%
Epoch 4/10 => Time: 36.25sec, Train avg loss: 0.9309, train acc: 70.05%, val acc: 65.30%
Epoch 5/10 => Time: 36.66sec, Train avg loss: 0.7994, train acc: 76.06%, val acc: 68.60%
Epoch 6/10 => Time: 36.42sec, Train avg loss: 0.6818, train acc: 79.07%, val acc: 71.00%
Epoch 7/10 => Time: 37.16sec, Train avg loss: 0.5793, train acc: 81.18%, val acc: 70.80%
Epoch 8/10 => Time: 37.28sec, Train avg loss: 0.4818, train acc: 84.15%, val acc: 71.30%
Epoch 9/10 => Time: 37.12sec, Train avg loss: 0.3983, train acc: 85.24%, val acc: 70.80%
Epoch 10/10 => Time: 36.99sec, Train avg loss: 0.3273, train acc: 85.09%, val acc: 71.30%
