# MobileNetV2 in Cifar10 with Augmentation and Dropout Techniques

## Importing Libraries,Training,Testing Datasets.

In [1]:
import torch
torch.cuda.empty_cache()
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np

from torch.utils.data import random_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform_Training = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_Testing = transforms.Compose([
    transforms.ToTensor(),
    
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_Training)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_Testing)

Files already downloaded and verified
Files already downloaded and verified


## Train and Test Loaders

In [2]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=5, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

## Building The Model


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Block(nn.Module):
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out





In [4]:
class MobileNetV2(nn.Module):
    def __init__(self, num_classes=10, dropout_prob=0.5):  # Add dropout_prob parameter
        super(MobileNetV2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)
        self.dropout = nn.Dropout(dropout_prob)  # Add dropout layer

    def _make_layers(self, in_planes):
        cfg = [(1, 16, 1, 1),
               (6, 24, 2, 1),  
               (6, 32, 3, 2),
               (6, 64, 4, 2),
               (6, 96, 3, 1),
               (6, 160, 3, 2),
               (6, 320, 1, 1)]
        layers = []
        for expansion, out_planes, num_blocks, stride in cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)  # Apply dropout
        out = self.linear(out)
        return out

In [5]:
def test():
    net = MobileNetV2()
    x = torch.randn(2, 3, 32, 32)  
    y = net(x)
    print(y.size()) 

test()


torch.Size([2, 10])


## Optimizer,Loss

In [6]:
net = MobileNetV2(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


## Training

In [7]:
for epoch in range(5):  
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:    
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')


[1,   200] loss: 2.373
[1,   400] loss: 2.271
[1,   600] loss: 2.168
[1,   800] loss: 2.124
[1,  1000] loss: 2.190
[1,  1200] loss: 2.110
[1,  1400] loss: 2.122
[1,  1600] loss: 2.131
[1,  1800] loss: 2.070
[1,  2000] loss: 2.090
[1,  2200] loss: 2.041
[1,  2400] loss: 2.020
[1,  2600] loss: 2.035
[1,  2800] loss: 2.034
[1,  3000] loss: 2.055
[1,  3200] loss: 2.018
[1,  3400] loss: 1.943
[1,  3600] loss: 1.935
[1,  3800] loss: 1.916
[1,  4000] loss: 1.969
[1,  4200] loss: 1.920
[1,  4400] loss: 1.888
[1,  4600] loss: 1.891
[1,  4800] loss: 1.870
[1,  5000] loss: 1.844
[1,  5200] loss: 1.830
[1,  5400] loss: 1.839
[1,  5600] loss: 1.904
[1,  5800] loss: 1.840
[1,  6000] loss: 1.815
[1,  6200] loss: 1.788
[1,  6400] loss: 1.849
[1,  6600] loss: 1.764
[1,  6800] loss: 1.748
[1,  7000] loss: 1.757
[1,  7200] loss: 1.812
[1,  7400] loss: 1.758
[1,  7600] loss: 1.686
[1,  7800] loss: 1.776
[1,  8000] loss: 1.640
[1,  8200] loss: 1.788
[1,  8400] loss: 1.695
[1,  8600] loss: 1.699
[1,  8800] 

In [8]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 69 %


In [9]:
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.816
[6,   400] loss: 0.803
[6,   600] loss: 0.772
[6,   800] loss: 0.831
[6,  1000] loss: 0.836
[6,  1200] loss: 0.768
[6,  1400] loss: 0.795
[6,  1600] loss: 0.794
[6,  1800] loss: 0.826
[6,  2000] loss: 0.866
[6,  2200] loss: 0.836
[6,  2400] loss: 0.777
[6,  2600] loss: 0.812
[6,  2800] loss: 0.766
[6,  3000] loss: 0.798
[6,  3200] loss: 0.798
[6,  3400] loss: 0.812
[6,  3600] loss: 0.808
[6,  3800] loss: 0.793
[6,  4000] loss: 0.782
[6,  4200] loss: 0.821
[6,  4400] loss: 0.906
[6,  4600] loss: 0.807
[6,  4800] loss: 0.813
[6,  5000] loss: 0.742
[6,  5200] loss: 0.791
[6,  5400] loss: 0.741
[6,  5600] loss: 0.872
[6,  5800] loss: 0.788
[6,  6000] loss: 0.825
[6,  6200] loss: 0.773
[6,  6400] loss: 0.813
[6,  6600] loss: 0.888
[6,  6800] loss: 0.912
[6,  7000] loss: 0.814
[6,  7200] loss: 0.755
[6,  7400] loss: 0.792
[6,  7600] loss: 0.789
[6,  7800] loss: 0.783
[6,  8000] loss: 0.806
[6,  8200] loss: 0.860
[6,  8400] loss: 0.825
[6,  8600] loss: 0.769
[6,  8800] 

In [10]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 77 %


In [11]:
torch.cuda.empty_cache()
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.595
[6,   400] loss: 0.573
[6,   600] loss: 0.486
[6,   800] loss: 0.555
[6,  1000] loss: 0.602
[6,  1200] loss: 0.564
[6,  1400] loss: 0.565
[6,  1600] loss: 0.545
[6,  1800] loss: 0.649
[6,  2000] loss: 0.579
[6,  2200] loss: 0.602
[6,  2400] loss: 0.579
[6,  2600] loss: 0.613
[6,  2800] loss: 0.597
[6,  3000] loss: 0.638
[6,  3200] loss: 0.622
[6,  3400] loss: 0.590
[6,  3600] loss: 0.638
[6,  3800] loss: 0.593
[6,  4000] loss: 0.550
[6,  4200] loss: 0.542
[6,  4400] loss: 0.559
[6,  4600] loss: 0.563
[6,  4800] loss: 0.535
[6,  5000] loss: 0.637
[6,  5200] loss: 0.499
[6,  5400] loss: 0.582
[6,  5600] loss: 0.580
[6,  5800] loss: 0.576
[6,  6000] loss: 0.622
[6,  6200] loss: 0.598
[6,  6400] loss: 0.607
[6,  6600] loss: 0.617
[6,  6800] loss: 0.522
[6,  7000] loss: 0.523
[6,  7200] loss: 0.579
[6,  7400] loss: 0.553
[6,  7600] loss: 0.568
[6,  7800] loss: 0.584
[6,  8000] loss: 0.586
[6,  8200] loss: 0.558
[6,  8400] loss: 0.548
[6,  8600] loss: 0.664
[6,  8800] 

In [12]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 80 %


In [13]:
torch.cuda.empty_cache()
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.438
[6,   400] loss: 0.406
[6,   600] loss: 0.499
[6,   800] loss: 0.446
[6,  1000] loss: 0.438
[6,  1200] loss: 0.474
[6,  1400] loss: 0.463
[6,  1600] loss: 0.475
[6,  1800] loss: 0.502
[6,  2000] loss: 0.539
[6,  2200] loss: 0.493
[6,  2400] loss: 0.423
[6,  2600] loss: 0.446
[6,  2800] loss: 0.498
[6,  3000] loss: 0.487
[6,  3200] loss: 0.488
[6,  3400] loss: 0.413
[6,  3600] loss: 0.428
[6,  3800] loss: 0.418
[6,  4000] loss: 0.460
[6,  4200] loss: 0.453
[6,  4400] loss: 0.504
[6,  4600] loss: 0.449
[6,  4800] loss: 0.475
[6,  5000] loss: 0.455
[6,  5200] loss: 0.452
[6,  5400] loss: 0.419
[6,  5600] loss: 0.429
[6,  5800] loss: 0.422
[6,  6000] loss: 0.493
[6,  6200] loss: 0.449
[6,  6400] loss: 0.501
[6,  6600] loss: 0.493
[6,  6800] loss: 0.453
[6,  7000] loss: 0.487
[6,  7200] loss: 0.445
[6,  7400] loss: 0.453
[6,  7600] loss: 0.480
[6,  7800] loss: 0.436
[6,  8000] loss: 0.483
[6,  8200] loss: 0.442
[6,  8400] loss: 0.470
[6,  8600] loss: 0.475
[6,  8800] 

In [14]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 82 %


In [15]:
torch.cuda.empty_cache()
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.378
[6,   400] loss: 0.357
[6,   600] loss: 0.358
[6,   800] loss: 0.391
[6,  1000] loss: 0.357
[6,  1200] loss: 0.417
[6,  1400] loss: 0.389
[6,  1600] loss: 0.325
[6,  1800] loss: 0.401
[6,  2000] loss: 0.383
[6,  2200] loss: 0.411
[6,  2400] loss: 0.358
[6,  2600] loss: 0.362
[6,  2800] loss: 0.368
[6,  3000] loss: 0.393
[6,  3200] loss: 0.379
[6,  3400] loss: 0.412
[6,  3600] loss: 0.401
[6,  3800] loss: 0.342
[6,  4000] loss: 0.373
[6,  4200] loss: 0.362
[6,  4400] loss: 0.381
[6,  4600] loss: 0.425
[6,  4800] loss: 0.435
[6,  5000] loss: 0.389
[6,  5200] loss: 0.355
[6,  5400] loss: 0.373
[6,  5600] loss: 0.398
[6,  5800] loss: 0.364
[6,  6000] loss: 0.356
[6,  6200] loss: 0.429
[6,  6400] loss: 0.351
[6,  6600] loss: 0.371
[6,  6800] loss: 0.337
[6,  7000] loss: 0.366
[6,  7200] loss: 0.385
[6,  7400] loss: 0.416
[6,  7600] loss: 0.440
[6,  7800] loss: 0.405
[6,  8000] loss: 0.342
[6,  8200] loss: 0.389
[6,  8400] loss: 0.354
[6,  8600] loss: 0.436
[6,  8800] 

In [17]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 84 %


In [18]:
torch.cuda.empty_cache()
additional_epochs = 5  # Number of additional epochs to train

for epoch in range(5, 5 + additional_epochs):  # Continue from epoch 10 to 30
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Additional Training')


[6,   200] loss: 0.312
[6,   400] loss: 0.295
[6,   600] loss: 0.320
[6,   800] loss: 0.351
[6,  1000] loss: 0.343
[6,  1200] loss: 0.316
[6,  1400] loss: 0.302
[6,  1600] loss: 0.403
[6,  1800] loss: 0.299
[6,  2000] loss: 0.334
[6,  2200] loss: 0.314
[6,  2400] loss: 0.320
[6,  2600] loss: 0.320
[6,  2800] loss: 0.313
[6,  3000] loss: 0.328
[6,  3200] loss: 0.327
[6,  3400] loss: 0.305
[6,  3600] loss: 0.309
[6,  3800] loss: 0.372
[6,  4000] loss: 0.302
[6,  4200] loss: 0.348
[6,  4400] loss: 0.331
[6,  4600] loss: 0.403
[6,  4800] loss: 0.357
[6,  5000] loss: 0.335
[6,  5200] loss: 0.336
[6,  5400] loss: 0.334
[6,  5600] loss: 0.323
[6,  5800] loss: 0.347
[6,  6000] loss: 0.340
[6,  6200] loss: 0.337
[6,  6400] loss: 0.350
[6,  6600] loss: 0.363
[6,  6800] loss: 0.342
[6,  7000] loss: 0.327
[6,  7200] loss: 0.315
[6,  7400] loss: 0.306
[6,  7600] loss: 0.308
[6,  7800] loss: 0.356
[6,  8000] loss: 0.295
[6,  8200] loss: 0.289
[6,  8400] loss: 0.351
[6,  8600] loss: 0.307
[6,  8800] 

## Final Evaluation

In [19]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 84 %
