# Configs

In [2]:
import numpy as np
import random
import torch
import os
from datetime import datetime
import time

class Config(object):
    def __init__(self):    
        # input 
        self.num_classes = 10
        # training 
        self.batch_size = 128
        self.epochs = 200
        self.start_epoch = 0
        self.momentum = 0.9
        self.lr = 1e-1
        self.weight_decay = 5e-4
        self.label_smoothing = 0
        self.model_name = 'resnet56_test2'

        self.gpu = True
        self.log_dir = '' + self.model_name
config = Config()

# Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

from torch.autograd import Variable

def _weights_init(m):
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, option='A'):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            if option == 'A':
                self.shortcut = LambdaLayer(lambda x:
                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
                self.shortcut = nn.Sequential(
                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                     nn.BatchNorm2d(self.expansion * planes)
                )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks):
        super(ResNet, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, config.num_classes)

        self.apply(_weights_init)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def resnet20():
    return ResNet(BasicBlock, [3, 3, 3])


def resnet32():
    return ResNet(BasicBlock, [5, 5, 5])


def resnet44():
    return ResNet(BasicBlock, [7, 7, 7])


def resnet56():
    return ResNet(BasicBlock, [9, 9, 9])


def resnet110():
    return ResNet(BasicBlock, [18, 18, 18])


def resnet1202():
    return ResNet(BasicBlock, [200, 200, 200])

In [3]:
import time
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.tensorboard import SummaryWriter

def train(train_loader, model, criterion, optimizer, epoch, writer):
    losses = 0.
    accs = 0.
    
    # switch to train mode
    model.train()
    for i, (input, target) in enumerate(train_loader):
        target = target.cuda()
        input_var = input.cuda()
        target_var = target

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        output = output.float()
        loss = loss.float()
        
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses += loss.item()
        accs += prec1.item() 
        
    losses /= len(train_loader)
    accs /= len(train_loader)
    print('[Epoch {epoch}] Average Loss : {loss:.3f}, Average Accuracy : {acc:.3f}'
          .format(epoch = epoch , loss=losses, acc=accs))
            
    writer.add_scalar("Loss/train", loss, epoch)
    writer.add_scalar("Accuracy/train", accs, epoch)

def validate(val_loader, model, criterion, epoch, writer):

    losses = 0.
    accs = 0.
    
    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            target = target.cuda()
            input_var = input.cuda()
            target_var = target.cuda()

            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses += loss.item()
            accs += prec1.item()
        
        losses /= len(val_loader)
        accs /= len(val_loader)
        
        writer.add_scalar("Loss/val", losses, epoch)
        writer.add_scalar("Accuracy/val", accs, epoch)
    
    return accs

def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res  


# Train

In [4]:
best_prec1 = 0
model = torch.nn.DataParallel(resnet56())
print("total params : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model.cuda()

writer = SummaryWriter(config.log_dir,filename_suffix=config.model_name)
cudnn.benchmark = True

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, 4),
        transforms.ToTensor(),
        normalize,
    ]),
    download=True),
    batch_size=config.batch_size, shuffle=True,
    num_workers=2, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
    transforms.ToTensor(),
    normalize,
    ])),
    batch_size=128, shuffle=False,
    num_workers=2, pin_memory=True)

criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.SGD(model.parameters(), config.lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)

lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200, eta_min=0)

for epoch in range(config.start_epoch, config.epochs):
    # train for one epoch
    writer.add_scalar("lr", optimizer.param_groups[0]['lr'], epoch)
    print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
    train(train_loader, model, criterion, optimizer, epoch, writer)
    lr_scheduler.step()

    # evaluate on validation set
    prec1 = validate(val_loader, model, criterion, epoch, writer)

    # remember best acc and save checkpoint
    is_best = prec1 > best_prec1
    if is_best:
        torch.save(model.state_dict(), "best.pth")
        best_prec1 = prec1
    
writer.flush()
writer.close()

total parameters :  853018
Files already downloaded and verified
current lr 1.00000e-01
Epoch: [0][0/391]	Time 9.480 (9.480)	Data 4.945 (4.945)	Loss 5.6147 (5.6147)	Prec@1 11.719 (11.719)
Epoch: [0][50/391]	Time 0.049 (0.234)	Data 0.000 (0.097)	Loss 2.3709 (3.6014)	Prec@1 7.031 (10.585)
Epoch: [0][100/391]	Time 0.046 (0.142)	Data 0.001 (0.049)	Loss 2.2756 (2.9691)	Prec@1 14.844 (11.255)
Epoch: [0][150/391]	Time 0.048 (0.112)	Data 0.000 (0.033)	Loss 2.2613 (2.7420)	Prec@1 13.281 (11.905)
Epoch: [0][200/391]	Time 0.048 (0.096)	Data 0.000 (0.025)	Loss 2.2374 (2.6176)	Prec@1 16.406 (12.675)
Epoch: [0][250/391]	Time 0.048 (0.087)	Data 0.000 (0.020)	Loss 2.0639 (2.5094)	Prec@1 24.219 (13.969)
Epoch: [0][300/391]	Time 0.049 (0.080)	Data 0.000 (0.017)	Loss 1.7894 (2.4207)	Prec@1 28.906 (15.480)
Epoch: [0][350/391]	Time 0.050 (0.076)	Data 0.000 (0.014)	Loss 1.8703 (2.3534)	Prec@1 27.344 (16.676)
epoch 0 training time consumed: 28.85s
Test: [0/79]	Time 3.862 (3.862)	Loss 1.9695 (1.9695)	Prec@1 2

KeyboardInterrupt: 

# Check the results

In [None]:
print(best_prec1)
%tensorboard --logdir=''


93.82
