Evaluate the performance of different types of optimizer on a LeNet-5 network using MNIST data. At least you need to evaluate SGD, AdaGrad, RMSprop. 

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

In [2]:
import numpy as np
from datetime import datetime 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from torchvision import datasets, transforms

import matplotlib.pyplot as plt

# check device
#DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# Preparing for Data
print('==> Preparing data..')

"""
# Training Data augmentation
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

"""

==> Preparing data..


"\n# Training Data augmentation\ntransform_train = transforms.Compose([\n    transforms.RandomCrop(32, padding=4),\n    transforms.RandomHorizontalFlip(),\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n# Testing Data preparation\ntransform_test = transforms.Compose([\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n\n#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n\n"

In [4]:
# Preparing for Data
print('==> Preparing data..')

# define transforms
transforms = transforms.Compose([transforms.Resize((32, 32)),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5,), (0.5,))])

==> Preparing data..


In [5]:
#Defining the convolutional neural network
class LeNet(nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        
    def forward(self, x):
        out = self.pool(F.relu(self.conv1(x)))
        out = self.pool(F.relu(self.conv2(out)))
        out = torch.flatten(out, 1) # flatten all dimensions except batch
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
 
        return out

In [6]:
model1 = LeNet()

In [7]:
def count_parameters(model):
    return sum(p.numel() for p in model1.parameters() if p.requires_grad)


print(f'The model has {count_parameters(model1):,} trainable parameters')

The model has 61,706 trainable parameters


In [8]:
########################################################################
# 3. Define a Loss function and optimizer
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Let's use a Classification Cross-Entropy loss and SGD with momentum.

#import torch.optim as optim

criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [9]:
def train(model, device, train_loader, optimizer, epoch):
    
    model.train()
    count = 0
    train_loss = 0
    total = 0 
    correct = 0
    
    total_step = len(train_loader)
    
    for batch_idx, (data, target) in enumerate(train_loader):
        
        data, target = data.to(device), target.to(device)
        

        #forward pass
        y_pred = model(data)

        loss = criterion(y_pred, target)

        acc = calculate_accuracy(y_pred, target)
        
        
        #Backward pass
        optimizer.zero_grad()
        
        loss.backward()

        optimizer.step()
        
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            

In [10]:
def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [11]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [12]:
def main():
    time0 = time.time()
    # Training settings
    batch_size = 128
    epochs = 50
    lr = 0.05
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='mnist_data', train=True, download=True, transform=transforms)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='mnist_data', train=False, download=True, transform=transforms)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.Adagrad(model.parameters(), lr=lr, lr_decay=0, weight_decay=5e-4, initial_accumulator_value=0, eps=1e-10)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()


Test set: Average loss: -9.9542, Accuracy: 9721/10000 (97%)


Test set: Average loss: -10.6435, Accuracy: 9805/10000 (98%)


Test set: Average loss: -11.1747, Accuracy: 9841/10000 (98%)




Test set: Average loss: -11.6235, Accuracy: 9856/10000 (99%)


Test set: Average loss: -11.4778, Accuracy: 9861/10000 (99%)


Test set: Average loss: -11.7186, Accuracy: 9862/10000 (99%)




Test set: Average loss: -12.0543, Accuracy: 9874/10000 (99%)


Test set: Average loss: -12.1194, Accuracy: 9883/10000 (99%)


Test set: Average loss: -12.4526, Accuracy: 9866/10000 (99%)


Test set: Average loss: -12.3959, Accuracy: 9893/10000 (99%)




Test set: Average loss: -12.6903, Accuracy: 9891/10000 (99%)


Test set: Average loss: -12.6245, Accuracy: 9892/10000 (99%)


Test set: Average loss: -12.8951, Accuracy: 9898/10000 (99%)




Test set: Average loss: -13.3180, Accuracy: 9898/10000 (99%)


Test set: Average loss: -13.4421, Accuracy: 9902/10000 (99%)


Test set: Average loss: -13.2116, Accuracy: 9893/10000 (99%)




Test set: Average loss: -13.2425, Accuracy: 9903/10000 (99%)


Test set: Average loss: -13.5847, Accuracy: 9903/10000 (99%)


Test set: Average loss: -13.5150, Accuracy: 9893/10000 (99%)


Test set: Average loss: -13.3240, Accuracy: 9902/10000 (99%)




Test set: Average loss: -13.6279, Accuracy: 9901/10000 (99%)


Test set: Average loss: -13.7611, Accuracy: 9896/10000 (99%)


Test set: Average loss: -13.7887, Accuracy: 9901/10000 (99%)




Test set: Average loss: -13.8771, Accuracy: 9907/10000 (99%)


Test set: Average loss: -13.7429, Accuracy: 9905/10000 (99%)


Test set: Average loss: -13.8946, Accuracy: 9900/10000 (99%)


Test set: Average loss: -14.0988, Accuracy: 9898/10000 (99%)




Test set: Average loss: -14.3139, Accuracy: 9897/10000 (99%)


Test set: Average loss: -14.0278, Accuracy: 9897/10000 (99%)


Test set: Average loss: -14.4877, Accuracy: 9905/10000 (99%)




Test set: Average loss: -14.4403, Accuracy: 9909/10000 (99%)


Test set: Average loss: -14.4398, Accuracy: 9915/10000 (99%)


Test set: Average loss: -14.2596, Accuracy: 9906/10000 (99%)




Test set: Average loss: -14.5645, Accuracy: 9894/10000 (99%)


Test set: Average loss: -14.6702, Accuracy: 9895/10000 (99%)


Test set: Average loss: -14.7419, Accuracy: 9906/10000 (99%)


Test set: Average loss: -14.5549, Accuracy: 9909/10000 (99%)




Test set: Average loss: -14.7810, Accuracy: 9899/10000 (99%)


Test set: Average loss: -14.7257, Accuracy: 9914/10000 (99%)


Test set: Average loss: -14.7011, Accuracy: 9908/10000 (99%)




Test set: Average loss: -14.7079, Accuracy: 9904/10000 (99%)


Test set: Average loss: -14.9416, Accuracy: 9910/10000 (99%)


Test set: Average loss: -14.7494, Accuracy: 9903/10000 (99%)




Test set: Average loss: -15.1061, Accuracy: 9904/10000 (99%)


Test set: Average loss: -14.9959, Accuracy: 9896/10000 (99%)


Test set: Average loss: -14.9107, Accuracy: 9890/10000 (99%)


Test set: Average loss: -15.0885, Accuracy: 9896/10000 (99%)




Test set: Average loss: -15.1261, Accuracy: 9906/10000 (99%)


Test set: Average loss: -14.9362, Accuracy: 9895/10000 (99%)


Test set: Average loss: -15.1872, Accuracy: 9861/10000 (99%)

Traning and Testing total excution time is: 2282.983158349991 seconds 
