In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

import timeit

In [0]:
!wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
!tar xzf cifar-10-python.tar.gz

--2020-04-28 14:46:17--  http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Resolving www.cs.toronto.edu (www.cs.toronto.edu)... 128.100.3.30
Connecting to www.cs.toronto.edu (www.cs.toronto.edu)|128.100.3.30|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 170498071 (163M) [application/x-gzip]
Saving to: ‘cifar-10-python.tar.gz’


2020-04-28 14:46:29 (13.8 MB/s) - ‘cifar-10-python.tar.gz’ saved [170498071/170498071]



In [0]:
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples

NUM_TRAIN = 49000
NUM_VAL = 1000

cifar10_train = dset.CIFAR10('/content/', train=True, download=False,
                           transform=T.ToTensor())
loader_train = DataLoader(cifar10_train, batch_size=64, sampler=ChunkSampler(NUM_TRAIN, 0))

cifar10_val = dset.CIFAR10('/content/', train=True, download=False,
                           transform=T.ToTensor())
loader_val = DataLoader(cifar10_val, batch_size=64, sampler=ChunkSampler(NUM_VAL, NUM_TRAIN))

cifar10_test = dset.CIFAR10('/content/', train=False, download=False,
                          transform=T.ToTensor())
loader_test = DataLoader(cifar10_test, batch_size=64)

In [0]:
dtype = torch.FloatTensor # the CPU datatype

# Constant to control how frequently we print train loss
print_every = 100

# This is a little utility that we'll use to reset the model
# if we want to re-initialize all our parameters
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()

In [0]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

In [0]:
# Verify that CUDA is properly configured and you have a GPU available

torch.cuda.is_available()
gpu_dtype = torch.cuda.FloatTensor

In [0]:
def train(model, loss_fn, optimizer, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(loader_train):
            x_var = Variable(x.type(gpu_dtype))
            y_var = Variable(y.type(gpu_dtype).long())

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for x, y in loader:
        x_var = Variable(x.type(gpu_dtype), volatile=True)

        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [0]:
class Bottleneck(nn.Module):
    def __init__(self, in_dims, growth_rate):
        super(Bottleneck, self).__init__()
        self.block = nn.Sequential(
        nn.BatchNorm2d(in_dims),
        nn.ReLU(),
        nn.Conv2d(in_dims, 4 * growth_rate, kernel_size=1),
            
        nn.BatchNorm2d(4 * growth_rate),
        nn.ReLU(),
        nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1))
        
    def forward(self, x):
        out = self.block(x)
        #print(x.shape)
        #print(out.shape)
        out = torch.cat([out, x], 1)
        return out

class Transition(nn.Module):
    def __init__(self, in_dims, out_dims):
        super(Transition, self).__init__()
        self.trans = nn.Sequential(
        nn.BatchNorm2d(in_dims),
        nn.ReLU(),
        nn.Conv2d(in_dims, out_dims, kernel_size=1),
        nn.AvgPool2d(2))
    
    def forward(self, x):
        return self.trans(x)  

In [0]:
nblocks = [6, 12, 24, 16]
learning_rate=0.01

class DenseNet(nn.Module):
    def __init__(self, block, trans, nblocks, growth_rate=12, theta=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        
        in_dims = 2*growth_rate
        self.dense1 = self.dense_block(block, nblocks[0], in_dims, growth_rate)
        in_dims += int(nblocks[0] * growth_rate)
        out_dims = int(in_dims * theta)
        self.trans1 = trans(in_dims, out_dims)
        in_dims = out_dims
        
        self.dense2 = self.dense_block(block, nblocks[1], in_dims, growth_rate)
        in_dims += int(nblocks[1] * growth_rate)
        out_dims = int(in_dims * theta)
        self.trans2 = trans(in_dims, out_dims)
        in_dims = out_dims
        
        self.dense3 = self.dense_block(block, nblocks[2], in_dims, growth_rate)
        in_dims += int(nblocks[2] * growth_rate)
        out_dims = int(in_dims * theta)
        self.trans3 = trans(in_dims, out_dims)
        in_dims = out_dims
        
        self.dense4 = self.dense_block(block, nblocks[3], in_dims, growth_rate)
        in_dims += int(nblocks[3] * growth_rate)
        
        self.dense_net = nn.Sequential(
            nn.Conv2d(3, 2 * growth_rate, kernel_size=3, padding=1),
            self.dense1,
            self.trans1,
            self.dense2,
            self.trans2,
            self.dense3,
            self.trans3,
            self.dense4,
            nn.BatchNorm2d(in_dims),
            nn.ReLU(),
            nn.AvgPool2d(4),
            Flatten(),
            nn.Linear(in_dims, num_classes)
        )
        
    def dense_block(self, block, num_blocks, in_dims, growth_rate):
        layers = []
        for i in range(num_blocks):
            layers.append(block(in_dims, growth_rate))
            in_dims += growth_rate
        return nn.Sequential(* layers)
    
    def forward(self, x):
        #print(x.shape)
        out = self.dense_net(x)
        return out

model = DenseNet(Bottleneck, Transition,nblocks)
model.type(gpu_dtype)

loss_fn = nn.CrossEntropyLoss().type(gpu_dtype)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)# lr sets the learning rate of the optimizer
train(model, loss_fn, optimizer, num_epochs=10)
check_accuracy(model, loader_val)

Starting epoch 1 / 10
t = 100, loss = 1.6864
t = 200, loss = 1.7229
t = 300, loss = 1.6110
t = 400, loss = 1.2373
t = 500, loss = 1.3040
t = 600, loss = 1.1853
t = 700, loss = 1.2401
Starting epoch 2 / 10
t = 100, loss = 0.9661
t = 200, loss = 1.0263
t = 300, loss = 1.0216
t = 400, loss = 0.8074
t = 500, loss = 0.9199
t = 600, loss = 0.7567
t = 700, loss = 0.8148
Starting epoch 3 / 10
t = 100, loss = 0.7195
t = 200, loss = 0.7940
t = 300, loss = 0.9014
t = 400, loss = 0.5596
t = 500, loss = 0.8449
t = 600, loss = 0.6544
t = 700, loss = 0.7426
Starting epoch 4 / 10
t = 100, loss = 0.5882
t = 200, loss = 0.6705
t = 300, loss = 0.7262
t = 400, loss = 0.4976
t = 500, loss = 0.7047
t = 600, loss = 0.6191
t = 700, loss = 0.6711
Starting epoch 5 / 10
t = 100, loss = 0.5267
t = 200, loss = 0.5543
t = 300, loss = 0.5690
t = 400, loss = 0.4282
t = 500, loss = 0.6193
t = 600, loss = 0.5305
t = 700, loss = 0.5804
Starting epoch 6 / 10
t = 100, loss = 0.4845
t = 200, loss = 0.4311
t = 300, loss = 0



Got 810 / 1000 correct (81.00)


In [0]:
best_model = model
check_accuracy(best_model, loader_test)

Checking accuracy on test set




Got 8070 / 10000 correct (80.70)
