# Configs

In [8]:
import numpy as np
import random
import torch
import os
from datetime import datetime
import time

class Config(object):
    def __init__(self):    
        # input 
        self.num_classes = 10
        # training 
        self.batch_size = 128
        self.epochs = 100
        self.start_epoch = 0
        self.momentum = 0.9
        self.lr = 1e-1
        self.weight_decay = 5e-4
        self.label_smoothing = 0
        self.model_name = 'mobilnetv2'

        self.gpu = True
        self.log_dir = '' + self.model_name
config = Config()

# Model

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LinearBottleNeck(nn.Module):
    def __init__(self, in_channels, out_channels, stride, t=6, class_num=100):
        super().__init__()

        self.residual = nn.Sequential(
            nn.Conv2d(in_channels, in_channels * t, 1),
            nn.BatchNorm2d(in_channels * t),
            nn.ReLU6(inplace=True),

            nn.Conv2d(in_channels * t, in_channels * t, 3, stride=stride, padding=1, groups=in_channels * t),
            nn.BatchNorm2d(in_channels * t),
            nn.ReLU6(inplace=True),

            nn.Conv2d(in_channels * t, out_channels, 1),
            nn.BatchNorm2d(out_channels)
        )

        self.stride = stride
        self.in_channels = in_channels
        self.out_channels = out_channels

    def forward(self, x):

        residual = self.residual(x)

        if self.stride == 1 and self.in_channels == self.out_channels:
            residual += x

        return residual

class MobileNetV2(nn.Module):

    def __init__(self, class_num=100):
        super().__init__()

        self.pre = nn.Sequential(
            nn.Conv2d(3, 32, 1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU6(inplace=True)
        )
        # reduce downsampling for cifar10
        self.stage1 = LinearBottleNeck(32, 16, 1, 1)
        self.stage2 = self._make_stage(2, 16, 24, 1, 6)
        self.stage3 = self._make_stage(3, 24, 32, 1, 6)
        self.stage4 = self._make_stage(4, 32, 64, 2, 6)
        self.stage5 = self._make_stage(3, 64, 96, 1, 6)
        self.stage6 = self._make_stage(3, 96, 160, 1, 6)
        self.stage7 = LinearBottleNeck(160, 320, 1, 6)

        self.conv1 = nn.Sequential(
            nn.Conv2d(320, 1280, 1),
            nn.BatchNorm2d(1280),
            nn.ReLU6(inplace=True)
        )

        self.conv2 = nn.Conv2d(1280, class_num, 1)

    def forward(self, x):
        x = self.pre(x)
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.stage5(x)
        x = self.stage6(x)
        x = self.stage7(x)
        x = self.conv1(x)
        x = F.adaptive_avg_pool2d(x, 1)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)

        return x

    def _make_stage(self, repeat, in_channels, out_channels, stride, t):

        layers = []
        layers.append(LinearBottleNeck(in_channels, out_channels, stride, t))

        while repeat - 1:
            layers.append(LinearBottleNeck(out_channels, out_channels, 1, t))
            repeat -= 1

        return nn.Sequential(*layers)

def mobilenetv2():
    return MobileNetV2()

In [10]:
model = MobileNetV2()
count = 0
print("params : ",sum(p.numel() for p in model.parameters() if p.requires_grad))

2369380


In [11]:
def train(train_loader, model, criterion, optimizer, epoch, writer):
    losses = 0.
    accs = 0.

    # switch to train mode
    model.train()
    
    for i, (input, target) in enumerate(train_loader):

        target = target.cuda()
        input_var = input.cuda()
        target_var = target

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)
        
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
            # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses += loss.item()
        accs += prec1.item()
        
    losses /= len(train_loader)
    accs /= len(train_loader)
    print('[Epoch {epoch}] Average Loss : {loss:.3f}, Average Accuracy : {acc:.3f}'
          .format(epoch = epoch , loss=losses, acc=accs))
    
    writer.add_scalar("Loss/train", losses, epoch)
    writer.add_scalar("Accuracy/train", accs, epoch)

def validate(val_loader, model, criterion, epoch, writer):
    losses = 0.
    accs = 0.

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            target = target.cuda()
            input_var = input.cuda()
            target_var = target.cuda()


            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses += loss.item()
            accs += prec1.item()
            
        losses /= len(val_loader)
        accs /= len(val_loader)
        print('[Validation] : Average Loss {loss:.3f}, Average Accuracy {acc:.3f}'
            .format(loss=losses, acc=accs))
        
        writer.add_scalar("Loss/val", losses, epoch)
        writer.add_scalar("Accuracy/val", accs, epoch)
    return accs

def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [12]:
import os
import random 
import numpy as np
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.tensorboard import SummaryWriter

config = Config()
writer = SummaryWriter(config.log_dir, filename_suffix=config.model_name)

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, 4),
        transforms.ToTensor(),
        normalize,
    ]),
    download=True),
    batch_size=config.batch_size, shuffle=True,
    num_workers=4, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
        ])),
        batch_size=config.batch_size, shuffle=False,
        num_workers=4, pin_memory=True)

best_prec1 = 0
model = torch.nn.DataParallel(model)
model.cuda()

print("cuda:", torch.cuda.is_available())

criterion = nn.CrossEntropyLoss(label_smoothing=config.label_smoothing).cuda()

optimizer = torch.optim.SGD(model.parameters(), config.lr,
                                  momentum=config.momentum,
                                  weight_decay=config.weight_decay)
    
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=0)


for epoch in range(config.start_epoch, config.epochs):
    # train for one epoch
    print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
    train(train_loader, model, criterion, optimizer, epoch)
    lr_scheduler.step()

    # evaluate on validation set
    prec1 = validate(val_loader, model, criterion)

    # remember best prec@1 and save checkpoint
    is_best = prec1 > best_prec1
    if is_best:
        torch.save(model.state_dict(), "best.pth")
        best_prec1 = prec1
        
writer.flush()
writer.close()

Files already downloaded and verified
cuda: True
current lr 1.00000e-01
Epoch: [0][0/391]	Time 5.592 (5.592)	Data 4.931 (4.931)	Loss 4.7458 (4.7458)	Prec@1 0.000 (0.000)
Epoch: [0][50/391]	Time 0.374 (0.474)	Data 0.001 (0.097)	Loss 2.7813 (3.3069)	Prec@1 26.562 (20.113)
Epoch: [0][100/391]	Time 0.376 (0.425)	Data 0.001 (0.049)	Loss 2.7416 (3.0332)	Prec@1 32.812 (26.818)
Epoch: [0][150/391]	Time 0.374 (0.408)	Data 0.001 (0.033)	Loss 2.6571 (2.9139)	Prec@1 37.500 (30.324)
Epoch: [0][200/391]	Time 0.373 (0.399)	Data 0.000 (0.025)	Loss 2.6431 (2.8374)	Prec@1 40.625 (33.077)
Epoch: [0][250/391]	Time 0.375 (0.394)	Data 0.000 (0.020)	Loss 2.6864 (2.7842)	Prec@1 41.406 (35.256)
Epoch: [0][300/391]	Time 0.374 (0.391)	Data 0.001 (0.017)	Loss 2.4161 (2.7433)	Prec@1 53.906 (36.968)
Epoch: [0][350/391]	Time 0.374 (0.388)	Data 0.001 (0.014)	Loss 2.3411 (2.7078)	Prec@1 51.562 (38.709)
epoch 0 training time consumed: 153.20s
Test: [0/79]	Time 4.128 (4.128)	Loss 2.5149 (2.5149)	Prec@1 47.656 (47.656)
T

In [13]:
print(best_prec1)
%load_ext tensorboard
%tensorboard --logdir ""

94.82
