# MobileNetV2 in Cifar10 with Augmentation Techniques

## Importing Libraries,Training,Testing Datasets.

In [3]:
import torch
torch.cuda.empty_cache()
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np

from torch.utils.data import random_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform_Training = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_Testing = transforms.Compose([
    transforms.ToTensor(),
    
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_Training)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_Testing)

Files already downloaded and verified
Files already downloaded and verified


## Train and Test Loaders

In [4]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=5, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

## Building The Model


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Block(nn.Module):
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out

In [6]:
class MobileNetV2(nn.Module):
    def __init__(self, num_classes=10):
        super(MobileNetV2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)

    def _make_layers(self, in_planes):
        cfg = [(1, 16, 1, 1),
               (6, 24, 2, 1),  
               (6, 32, 3, 2),
               (6, 64, 4, 2),
               (6, 96, 3, 1),
               (6, 160, 3, 2),
               (6, 320, 1, 1)]
        layers = []
        for expansion, out_planes, num_blocks, stride in cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [7]:
def test():
    net = MobileNetV2()
    x = torch.randn(2, 3, 32, 32)  
    y = net(x)
    print(y.size()) 

test()


torch.Size([2, 10])


In [8]:
net = MobileNetV2(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


## Training

In [9]:
torch.cuda.empty_cache()
for epoch in range(5):  
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:    
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')


[1,   200] loss: 2.329
[1,   400] loss: 2.194
[1,   600] loss: 2.148
[1,   800] loss: 2.104
[1,  1000] loss: 2.066
[1,  1200] loss: 2.044
[1,  1400] loss: 2.003
[1,  1600] loss: 1.969
[1,  1800] loss: 1.969
[1,  2000] loss: 1.938
[1,  2200] loss: 1.940
[1,  2400] loss: 1.909
[1,  2600] loss: 1.981
[1,  2800] loss: 1.920
[1,  3000] loss: 1.890
[1,  3200] loss: 1.837
[1,  3400] loss: 1.843
[1,  3600] loss: 1.869
[1,  3800] loss: 1.782
[1,  4000] loss: 1.778
[1,  4200] loss: 1.788
[1,  4400] loss: 1.816
[1,  4600] loss: 1.696
[1,  4800] loss: 1.731
[1,  5000] loss: 1.802
[1,  5200] loss: 1.736
[1,  5400] loss: 1.706
[1,  5600] loss: 1.721
[1,  5800] loss: 1.671
[1,  6000] loss: 1.677
[1,  6200] loss: 1.664
[1,  6400] loss: 1.687
[1,  6600] loss: 1.599
[1,  6800] loss: 1.639
[1,  7000] loss: 1.638
[1,  7200] loss: 1.627
[1,  7400] loss: 1.630
[1,  7600] loss: 1.545
[1,  7800] loss: 1.538
[1,  8000] loss: 1.583
[1,  8200] loss: 1.586
[1,  8400] loss: 1.536
[1,  8600] loss: 1.533
[1,  8800] 

In [10]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 70 %


In [12]:
torch.cuda.empty_cache()
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.760
[6,   400] loss: 0.693
[6,   600] loss: 0.804
[6,   800] loss: 0.799
[6,  1000] loss: 0.747
[6,  1200] loss: 0.783
[6,  1400] loss: 0.841
[6,  1600] loss: 0.751
[6,  1800] loss: 0.809
[6,  2000] loss: 0.768
[6,  2200] loss: 0.823
[6,  2400] loss: 0.758
[6,  2600] loss: 0.768
[6,  2800] loss: 0.739
[6,  3000] loss: 0.696
[6,  3200] loss: 0.802
[6,  3400] loss: 0.787
[6,  3600] loss: 0.788
[6,  3800] loss: 0.714
[6,  4000] loss: 0.746
[6,  4200] loss: 0.817
[6,  4400] loss: 0.751
[6,  4600] loss: 0.771
[6,  4800] loss: 0.776
[6,  5000] loss: 0.751
[6,  5200] loss: 0.722
[6,  5400] loss: 0.829
[6,  5600] loss: 0.715
[6,  5800] loss: 0.811
[6,  6000] loss: 0.769
[6,  6200] loss: 0.770
[6,  6400] loss: 0.781
[6,  6600] loss: 0.744
[6,  6800] loss: 0.712
[6,  7000] loss: 0.671
[6,  7200] loss: 0.691
[6,  7400] loss: 0.785
[6,  7600] loss: 0.786
[6,  7800] loss: 0.816
[6,  8000] loss: 0.709
[6,  8200] loss: 0.687
[6,  8400] loss: 0.722
[6,  8600] loss: 0.795
[6,  8800] 

In [13]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 77 %


In [14]:
torch.cuda.empty_cache()
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.559
[6,   400] loss: 0.563
[6,   600] loss: 0.618
[6,   800] loss: 0.593
[6,  1000] loss: 0.553
[6,  1200] loss: 0.544
[6,  1400] loss: 0.559
[6,  1600] loss: 0.555
[6,  1800] loss: 0.541
[6,  2000] loss: 0.527
[6,  2200] loss: 0.549
[6,  2400] loss: 0.516
[6,  2600] loss: 0.554
[6,  2800] loss: 0.512
[6,  3000] loss: 0.538
[6,  3200] loss: 0.522
[6,  3400] loss: 0.559
[6,  3600] loss: 0.541
[6,  3800] loss: 0.536
[6,  4000] loss: 0.552
[6,  4200] loss: 0.601
[6,  4400] loss: 0.529
[6,  4600] loss: 0.548
[6,  4800] loss: 0.544
[6,  5000] loss: 0.562
[6,  5200] loss: 0.535
[6,  5400] loss: 0.529
[6,  5600] loss: 0.548
[6,  5800] loss: 0.504
[6,  6000] loss: 0.584
[6,  6200] loss: 0.576
[6,  6400] loss: 0.547
[6,  6600] loss: 0.545
[6,  6800] loss: 0.498
[6,  7000] loss: 0.485
[6,  7200] loss: 0.513
[6,  7400] loss: 0.574
[6,  7600] loss: 0.522
[6,  7800] loss: 0.522
[6,  8000] loss: 0.557
[6,  8200] loss: 0.562
[6,  8400] loss: 0.557
[6,  8600] loss: 0.560
[6,  8800] 

In [15]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 79 %


In [16]:
torch.cuda.empty_cache()
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.436
[6,   400] loss: 0.421
[6,   600] loss: 0.483
[6,   800] loss: 0.380
[6,  1000] loss: 0.483
[6,  1200] loss: 0.489
[6,  1400] loss: 0.436
[6,  1600] loss: 0.441
[6,  1800] loss: 0.449
[6,  2000] loss: 0.389
[6,  2200] loss: 0.441
[6,  2400] loss: 0.393
[6,  2600] loss: 0.428
[6,  2800] loss: 0.425
[6,  3000] loss: 0.410
[6,  3200] loss: 0.407
[6,  3400] loss: 0.382
[6,  3600] loss: 0.416
[6,  3800] loss: 0.438
[6,  4000] loss: 0.451
[6,  4200] loss: 0.454
[6,  4400] loss: 0.435
[6,  4600] loss: 0.415
[6,  4800] loss: 0.398
[6,  5000] loss: 0.414
[6,  5200] loss: 0.428
[6,  5400] loss: 0.447
[6,  5600] loss: 0.417
[6,  5800] loss: 0.451
[6,  6000] loss: 0.443
[6,  6200] loss: 0.427
[6,  6400] loss: 0.483
[6,  6600] loss: 0.405
[6,  6800] loss: 0.452
[6,  7000] loss: 0.442
[6,  7200] loss: 0.510
[6,  7400] loss: 0.414
[6,  7600] loss: 0.376
[6,  7800] loss: 0.438
[6,  8000] loss: 0.446
[6,  8200] loss: 0.466
[6,  8400] loss: 0.463
[6,  8600] loss: 0.492
[6,  8800] 

## Final Evaluation

In [17]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 83 %
