Evaluate the performance of different types of optimizer on a LeNet-5 network using MNIST data. At least you need to evaluate SGD, AdaGrad, RMSprop. 

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

In [2]:
import numpy as np
from datetime import datetime 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from torchvision import datasets, transforms

import matplotlib.pyplot as plt

# check device
#DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# Preparing for Data
print('==> Preparing data..')

"""
# Training Data augmentation
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

"""

==> Preparing data..


"\n# Training Data augmentation\ntransform_train = transforms.Compose([\n    transforms.RandomCrop(32, padding=4),\n    transforms.RandomHorizontalFlip(),\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n# Testing Data preparation\ntransform_test = transforms.Compose([\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n\n#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n\n"

In [4]:
# Preparing for Data
print('==> Preparing data..')

# define transforms
transforms = transforms.Compose([transforms.Resize((32, 32)),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5,), (0.5,))])


==> Preparing data..


In [5]:
#Defining the convolutional neural network
class LeNet(nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        
    def forward(self, x):
        out = self.pool(F.relu(self.conv1(x)))
        out = self.pool(F.relu(self.conv2(out)))
        out = torch.flatten(out, 1) # flatten all dimensions except batch
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
 
        return out

In [6]:
model1 = LeNet()

In [7]:
def count_parameters(model):
    return sum(p.numel() for p in model1.parameters() if p.requires_grad)


print(f'The model has {count_parameters(model1):,} trainable parameters')

The model has 61,706 trainable parameters


In [8]:
########################################################################
# 3. Define a Loss function and optimizer
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Let's use a Classification Cross-Entropy loss and SGD with momentum.

#import torch.optim as optim

criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [9]:
def train(model, device, train_loader, optimizer, epoch):
    
    model.train()
    count = 0
    train_loss = 0
    total = 0 
    correct = 0
    
    total_step = len(train_loader)
    
    for batch_idx, (data, target) in enumerate(train_loader):
        
        data, target = data.to(device), target.to(device)
        

        #forward pass
        y_pred = model(data)

        loss = criterion(y_pred, target)

        acc = calculate_accuracy(y_pred, target)
        
        
        #Backward pass
        optimizer.zero_grad()
        
        loss.backward()

        optimizer.step()
        
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            

In [10]:
def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [11]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [12]:
#checking the dataset in the code 
trainset = torchvision.datasets.MNIST(root='mnist_data', train=True, download=True, transform=transforms)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
testset = torchvision.datasets.MNIST(root='mnist_data', train=False, download=True, transform=transforms)
test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

In [13]:
#printing the dataset and dimensions to check whether we are working on the correct dataset:

for data, target in train_loader:
    print('Image batch dimensions:', data.shape)
    print('Image label dimensions:', target.shape)
    print('Class labels of the 10 examples:', target[:10])
    break
    

Image batch dimensions: torch.Size([128, 1, 32, 32])
Image label dimensions: torch.Size([128])
Class labels of the 10 examples: tensor([7, 6, 7, 2, 2, 1, 7, 0, 6, 3])


In [14]:
def main():
    time0 = time.time()
    # Training settings
    batch_size = 128
    epochs = 50
    lr = 0.05
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='mnist_data', train=True, download=True, transform=transforms)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='mnist_data', train=False, download=True, transform=transforms)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"mnist_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()


Test set: Average loss: -11.3003, Accuracy: 9828/10000 (98%)


Test set: Average loss: -10.7445, Accuracy: 9857/10000 (99%)


Test set: Average loss: -12.5339, Accuracy: 9864/10000 (99%)




Test set: Average loss: -13.3539, Accuracy: 9861/10000 (99%)


Test set: Average loss: -13.2154, Accuracy: 9875/10000 (99%)


Test set: Average loss: -13.2294, Accuracy: 9888/10000 (99%)




Test set: Average loss: -14.3072, Accuracy: 9912/10000 (99%)


Test set: Average loss: -13.0424, Accuracy: 9900/10000 (99%)


Test set: Average loss: -13.3854, Accuracy: 9900/10000 (99%)


Test set: Average loss: -13.4105, Accuracy: 9890/10000 (99%)




Test set: Average loss: -12.3294, Accuracy: 9907/10000 (99%)


Test set: Average loss: -13.7528, Accuracy: 9867/10000 (99%)


Test set: Average loss: -14.8480, Accuracy: 9906/10000 (99%)




Test set: Average loss: -14.1384, Accuracy: 9896/10000 (99%)


Test set: Average loss: -14.2842, Accuracy: 9893/10000 (99%)


Test set: Average loss: -14.0241, Accuracy: 9909/10000 (99%)




Test set: Average loss: -13.5564, Accuracy: 9897/10000 (99%)


Test set: Average loss: -14.4551, Accuracy: 9906/10000 (99%)


Test set: Average loss: -13.0189, Accuracy: 9905/10000 (99%)


Test set: Average loss: -13.6335, Accuracy: 9896/10000 (99%)




Test set: Average loss: -13.8160, Accuracy: 9913/10000 (99%)


Test set: Average loss: -13.9483, Accuracy: 9899/10000 (99%)


Test set: Average loss: -14.2249, Accuracy: 9894/10000 (99%)




Test set: Average loss: -13.6853, Accuracy: 9904/10000 (99%)


Test set: Average loss: -13.2361, Accuracy: 9901/10000 (99%)


Test set: Average loss: -13.8212, Accuracy: 9885/10000 (99%)


Test set: Average loss: -13.5056, Accuracy: 9891/10000 (99%)




Test set: Average loss: -13.6450, Accuracy: 9891/10000 (99%)


Test set: Average loss: -13.4885, Accuracy: 9873/10000 (99%)


Test set: Average loss: -14.0808, Accuracy: 9915/10000 (99%)




Test set: Average loss: -13.9802, Accuracy: 9891/10000 (99%)


Test set: Average loss: -14.2093, Accuracy: 9901/10000 (99%)


Test set: Average loss: -13.5688, Accuracy: 9879/10000 (99%)




Test set: Average loss: -13.8313, Accuracy: 9907/10000 (99%)


Test set: Average loss: -14.3991, Accuracy: 9865/10000 (99%)


Test set: Average loss: -14.3648, Accuracy: 9879/10000 (99%)


Test set: Average loss: -14.1971, Accuracy: 9909/10000 (99%)




Test set: Average loss: -15.4533, Accuracy: 9882/10000 (99%)


Test set: Average loss: -14.1463, Accuracy: 9898/10000 (99%)


Test set: Average loss: -14.1596, Accuracy: 9925/10000 (99%)




Test set: Average loss: -13.9785, Accuracy: 9907/10000 (99%)


Test set: Average loss: -13.4401, Accuracy: 9902/10000 (99%)


Test set: Average loss: -14.5840, Accuracy: 9918/10000 (99%)




Test set: Average loss: -13.7782, Accuracy: 9904/10000 (99%)


Test set: Average loss: -14.5093, Accuracy: 9889/10000 (99%)


Test set: Average loss: -13.3266, Accuracy: 9897/10000 (99%)


Test set: Average loss: -13.9299, Accuracy: 9870/10000 (99%)




Test set: Average loss: -14.4572, Accuracy: 9882/10000 (99%)


Test set: Average loss: -14.3542, Accuracy: 9912/10000 (99%)


Test set: Average loss: -13.5688, Accuracy: 9883/10000 (99%)

Traning and Testing total excution time is: 2639.2156250476837 seconds 


In [15]:
!rm -r mnist_data

'rm' is not recognized as an internal or external command,
operable program or batch file.
