# Configs


In [1]:
import numpy as np
import random
import torch
import os
from datetime import datetime
import time

class Config(object):
    def __init__(self):    
        # input 
        self.num_classes = 10
        # training 
        self.batch_size = 128
        self.epochs = 100
        self.start_epoch = 0
        self.momentum = 0.9
        self.lr = 1e-1
        self.weight_decay = 5e-4
        self.label_smoothing = 0
        self.model_name = 'vgg_nomaxpool'

        self.gpu = True
        self.log_dir = '' + self.model_name
config = Config()

# Model

In [2]:

import torch
import torch.nn as nn
import torch.nn.functional as F

class VggNet(nn.Module):

    def __init__(self, features):
        # features : [64, 64, M, ...]
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512*4*4, 512),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Linear(512, config.num_classes), # num class = 10 (CIFAR10)
        )

        self.layers = [] # construct layers using 'features' paremeter
        in_channels = 3
        for i in features:
            if i == 'M': #MaxPooling layer
                self.layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else: #Convolution + Batchnorm + Relu
                conv2d = nn.Conv2d(in_channels, i, kernel_size=3, padding=1)
                self.layers += [conv2d, nn.BatchNorm2d(i), nn.ReLU(inplace=True)]
                in_channels = i
        self.layers = nn.Sequential(*self.layers)

    def forward(self, x):
        x = self.layers(x)
        x = x.view(x.size(0), -1) #flattens the tensor with batch size
        x = self.classifier(x)
        return x

def train(train_loader, model, criterion, optimizer, epoch, writer):
    losses = 0.
    accs = 0.

    # switch to train mode
    model.train()

    for i, (input, target) in enumerate(train_loader):

        target = target.cuda()
        input_var = input.cuda()
        target_var = target

        # compute output
        output = model(input_var)
        # compute loss
        loss = criterion(output, target_var)

        # compute gradient and do SGD step
        optimizer.zero_grad()    # set gradients to zero
        loss.backward()          # compute gradients
        optimizer.step()         # step with learning rate

        output = output.float()
        loss = loss.float()

        # measure accuracy and record loss
        acc = accuracy(output.data, target)[0]
        losses += loss.item()
        accs += acc
    accs /= len(train_loader)
    losses /= len(train_loader)
    print('[Epoch {epoch}] Average Loss : {loss:.3f}, Average Accuracy : {acc:.3f}'
          .format(epoch = epoch , loss=losses, acc=accs))

    writer.add_scalar("Loss/train", losses, epoch)
    writer.add_scalar("Accuracy/train", accs, epoch)

def validate(val_loader, model, criterion, epoch, writer):
    losses = 0.
    accs = 0.

    # switch to evaluate mode
    model.eval()

    with torch.no_grad(): # disable tracking gradient to reduce memory use and increase computation speed
        for i, (input, target) in enumerate(val_loader):
            target = target.cuda()
            input_var = input.cuda()
            target_var = target.cuda()


            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)
            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses += loss.item()
            accs += prec1.item()

        losses /= len(val_loader)
        accs /= len(val_loader)
        print('[Validation] : Average Loss {loss:.3f}, Average Accuracy {acc:.3f}'
            .format(loss=losses, acc=accs))

        writer.add_scalar("Loss/val", losses, epoch)
        writer.add_scalar("Accuracy/val", accs, epoch)

    return accs
# top k accuacry
def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

# Initialize Model

In [3]:
features = [64, 64, 128, 128, 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
model = VggNet(features)
count = 0
# total params
print("total params :", sum(p.numel() for p in model.parameters() if p.requires_grad))
print(model)

24497994


# Load Dataset

In [None]:
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.tensorboard import SummaryWriter

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, 4),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
    ]),
    download=True),
    batch_size=config.batch_size, shuffle=True,
    num_workers=2, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
        ])),
        batch_size=config.batch_size, shuffle=False,
        num_workers=2, pin_memory=True)

writer = SummaryWriter(config.log_dir, filename_suffix=config.model_name)

input_tensor = torch.Tensor(1, 3, 32, 32)
if config.gpu:
    input_tensor = input_tensor.cuda()

Files already downloaded and verified


# Train

In [6]:
print("cuda:", torch.cuda.is_available())
best_prec1 = 0
model = torch.nn.DataParallel(model)
model.cuda()

# CrossEntropyLoss with smoothing
criterion = nn.CrossEntropyLoss(label_smoothing=config.label_smoothing).cuda()
# Choose optimizer
optimizer = torch.optim.SGD(model.parameters(), config.lr,
                                  momentum=config.momentum,
                                  weight_decay=config.weight_decay)
# learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=0)

for epoch in range(config.start_epoch, config.epochs):
    # train for one epoch
    # print current learning rate
    print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
    # train
    train(train_loader, model, criterion, optimizer, epoch, writer)
    # learning rate decay
    lr_scheduler.step()
    # evaluate on validation set
    prec1 = validate(val_loader, model, criterion, epoch, writer)

    # remember best acc and save checkpoint
    is_best = prec1 > best_prec1
    if is_best:
        torch.save(model.state_dict(), "best.pth")
        best_prec1 = prec1


cuda: True
current lr 1.00000e-01
Epoch: [0][0/391]	Time 10.284 (10.284)	Loss 2.3382 (2.3382)	Prec@1 12.500 (12.500)
Epoch: [0][50/391]	Time 0.250 (0.456)	Loss 2.3464 (2.3703)	Prec@1 12.500 (10.187)
Epoch: [0][100/391]	Time 0.263 (0.359)	Loss 2.2943 (2.3412)	Prec@1 14.062 (10.682)
Epoch: [0][150/391]	Time 0.262 (0.326)	Loss 2.2826 (2.3273)	Prec@1 15.625 (10.865)
Epoch: [0][200/391]	Time 0.260 (0.310)	Loss 2.2997 (2.3193)	Prec@1 10.938 (10.899)
Epoch: [0][250/391]	Time 0.259 (0.300)	Loss 2.2907 (2.3145)	Prec@1 9.375 (10.751)
Epoch: [0][300/391]	Time 0.260 (0.293)	Loss 2.2729 (2.3092)	Prec@1 10.156 (11.054)
Epoch: [0][350/391]	Time 0.260 (0.289)	Loss 2.1243 (2.2925)	Prec@1 11.719 (11.699)
epoch 0 training time consumed: 114.66s
Test: [0/79]	Time 4.049 (4.049)	Loss 2.1663 (2.1663)	Prec@1 15.625 (15.625)
Test: [50/79]	Time 0.086 (0.164)	Loss 2.1181 (2.1963)	Prec@1 19.531 (13.925)
 * Prec@1 13.930
current lr 9.99753e-02
Epoch: [1][0/391]	Time 5.387 (5.387)	Loss 2.1449 (2.1449)	Prec@1 16.406

# Check the results

In [7]:
print(best_prec1)
%load_ext tensorboard
%tensorboard --logdir "/content/vgg_nomaxpool"

95.11
