# MobileNetV2 in Cifar10

## Importing Libraries,Training,Testing Datasets.

In [1]:
import torch
torch.cuda.empty_cache()
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np

from torch.utils.data import random_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform_Training = transforms.Compose([
   
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

])

transform_Testing = transforms.Compose([
    transforms.ToTensor(),
    
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_Training)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_Testing)

Files already downloaded and verified
Files already downloaded and verified


## Train and Test Loaders

In [2]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=5, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

## Building the Model

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Block(nn.Module):
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out

In [4]:
class MobileNetV2(nn.Module):
    def __init__(self, num_classes=10):
        super(MobileNetV2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)

    def _make_layers(self, in_planes):
        cfg = [(1, 16, 1, 1),
               (6, 24, 2, 1),  
               (6, 32, 3, 2),
               (6, 64, 4, 2),
               (6, 96, 3, 1),
               (6, 160, 3, 2),
               (6, 320, 1, 1)]
        layers = []
        for expansion, out_planes, num_blocks, stride in cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [5]:
def test():
    net = MobileNetV2()
    x = torch.randn(2, 3, 32, 32)  
    y = net(x)
    print(y.size()) 

test()


torch.Size([2, 10])


## Loss,Optimizer

In [6]:
net = MobileNetV2(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


## Training

In [7]:
for epoch in range(5):  
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:    
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')


[1,   200] loss: 2.289
[1,   400] loss: 2.151
[1,   600] loss: 2.149
[1,   800] loss: 2.104
[1,  1000] loss: 2.060
[1,  1200] loss: 1.970
[1,  1400] loss: 1.988
[1,  1600] loss: 1.903
[1,  1800] loss: 1.911
[1,  2000] loss: 1.875
[1,  2200] loss: 1.882
[1,  2400] loss: 1.881
[1,  2600] loss: 1.878
[1,  2800] loss: 1.771
[1,  3000] loss: 1.807
[1,  3200] loss: 1.850
[1,  3400] loss: 1.820
[1,  3600] loss: 1.773
[1,  3800] loss: 1.706
[1,  4000] loss: 1.751
[1,  4200] loss: 1.747
[1,  4400] loss: 1.640
[1,  4600] loss: 1.703
[1,  4800] loss: 1.652
[1,  5000] loss: 1.604
[1,  5200] loss: 1.673
[1,  5400] loss: 1.627
[1,  5600] loss: 1.582
[1,  5800] loss: 1.617
[1,  6000] loss: 1.654
[1,  6200] loss: 1.583
[1,  6400] loss: 1.531
[1,  6600] loss: 1.578
[1,  6800] loss: 1.523
[1,  7000] loss: 1.414
[1,  7200] loss: 1.467
[1,  7400] loss: 1.470
[1,  7600] loss: 1.487
[1,  7800] loss: 1.504
[1,  8000] loss: 1.525
[1,  8200] loss: 1.420
[1,  8400] loss: 1.434
[1,  8600] loss: 1.425
[1,  8800] 

In [8]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 73 %


In [10]:
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.279
[6,   400] loss: 0.308
[6,   600] loss: 0.266
[6,   800] loss: 0.314
[6,  1000] loss: 0.275
[6,  1200] loss: 0.287
[6,  1400] loss: 0.332
[6,  1600] loss: 0.346
[6,  1800] loss: 0.324
[6,  2000] loss: 0.326
[6,  2200] loss: 0.293
[6,  2400] loss: 0.328
[6,  2600] loss: 0.339
[6,  2800] loss: 0.322
[6,  3000] loss: 0.311
[6,  3200] loss: 0.375
[6,  3400] loss: 0.321
[6,  3600] loss: 0.351
[6,  3800] loss: 0.380
[6,  4000] loss: 0.317
[6,  4200] loss: 0.373
[6,  4400] loss: 0.389
[6,  4600] loss: 0.360
[6,  4800] loss: 0.349
[6,  5000] loss: 0.389
[6,  5200] loss: 0.331
[6,  5400] loss: 0.371
[6,  5600] loss: 0.381
[6,  5800] loss: 0.392
[6,  6000] loss: 0.386
[6,  6200] loss: 0.335
[6,  6400] loss: 0.394
[6,  6600] loss: 0.355
[6,  6800] loss: 0.389
[6,  7000] loss: 0.356
[6,  7200] loss: 0.437
[6,  7400] loss: 0.407
[6,  7600] loss: 0.408
[6,  7800] loss: 0.372
[6,  8000] loss: 0.397
[6,  8200] loss: 0.429
[6,  8400] loss: 0.381
[6,  8600] loss: 0.350
[6,  8800] 

In [11]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 78 %


In [12]:
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.171
[6,   400] loss: 0.168
[6,   600] loss: 0.165
[6,   800] loss: 0.173
[6,  1000] loss: 0.140
[6,  1200] loss: 0.150
[6,  1400] loss: 0.113
[6,  1600] loss: 0.160
[6,  1800] loss: 0.209
[6,  2000] loss: 0.158
[6,  2200] loss: 0.153
[6,  2400] loss: 0.158
[6,  2600] loss: 0.177
[6,  2800] loss: 0.184
[6,  3000] loss: 0.157
[6,  3200] loss: 0.180
[6,  3400] loss: 0.221
[6,  3600] loss: 0.174
[6,  3800] loss: 0.202
[6,  4000] loss: 0.243
[6,  4200] loss: 0.273
[6,  4400] loss: 0.209
[6,  4600] loss: 0.236
[6,  4800] loss: 0.230
[6,  5000] loss: 0.207
[6,  5200] loss: 0.177
[6,  5400] loss: 0.217
[6,  5600] loss: 0.199
[6,  5800] loss: 0.193
[6,  6000] loss: 0.224
[6,  6200] loss: 0.165
[6,  6400] loss: 0.228
[6,  6600] loss: 0.220
[6,  6800] loss: 0.207
[6,  7000] loss: 0.206
[6,  7200] loss: 0.223
[6,  7400] loss: 0.235
[6,  7600] loss: 0.233
[6,  7800] loss: 0.195
[6,  8000] loss: 0.204
[6,  8200] loss: 0.233
[6,  8400] loss: 0.194
[6,  8600] loss: 0.196
[6,  8800] 

## Final Evaluation

In [13]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 78 %
