Evaluate the performance of different types of optimizer on a LeNet-5 network using MNIST data. At least you need to evaluate SGD, AdaGrad, RMSprop. 

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

In [2]:
import numpy as np
from datetime import datetime 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from torchvision import datasets, transforms

import matplotlib.pyplot as plt

# check device
#DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# Preparing for Data
print('==> Preparing data..')

"""
# Training Data augmentation
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

"""

==> Preparing data..


"\n# Training Data augmentation\ntransform_train = transforms.Compose([\n    transforms.RandomCrop(32, padding=4),\n    transforms.RandomHorizontalFlip(),\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n# Testing Data preparation\ntransform_test = transforms.Compose([\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n\n#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n\n"

In [4]:
# Preparing for Data
print('==> Preparing data..')

# define transforms
transforms = transforms.Compose([transforms.Resize((32, 32)),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5,), (0.5,))])


==> Preparing data..


In [5]:
#Defining the convolutional neural network
class LeNet(nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        
    def forward(self, x):
        out = self.pool(F.relu(self.conv1(x)))
        out = self.pool(F.relu(self.conv2(out)))
        out = torch.flatten(out, 1) # flatten all dimensions except batch
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
 
        return out

In [6]:
model1 = LeNet()

In [7]:
def count_parameters(model):
    return sum(p.numel() for p in model1.parameters() if p.requires_grad)


print(f'The model has {count_parameters(model1):,} trainable parameters')

The model has 61,706 trainable parameters


In [8]:
########################################################################
# 3. Define a Loss function and optimizer
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Let's use a Classification Cross-Entropy loss and SGD with momentum.

#import torch.optim as optim

criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [9]:
def train(model, device, train_loader, optimizer, epoch):
    
    model.train()
    count = 0
    train_loss = 0
    total = 0 
    correct = 0
    
    total_step = len(train_loader)
    
    for batch_idx, (data, target) in enumerate(train_loader):
        
        data, target = data.to(device), target.to(device)
        

        #forward pass
        y_pred = model(data)

        loss = criterion(y_pred, target)

        acc = calculate_accuracy(y_pred, target)
        
        
        #Backward pass
        optimizer.zero_grad()
        
        loss.backward()

        optimizer.step()
        
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            

In [10]:
def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [11]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [12]:
def main():
    time0 = time.time()
    # Training settings
    batch_size = 128
    epochs = 50
    lr = 0.01 # hyper parameter 
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='mnist_data', train=True, download=True, transform=transforms)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='mnist_data', train=False, download=True, transform=transforms)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.RMSprop(model.parameters(), lr=lr, alpha = 0.9, momentum=0.9, weight_decay=5e-4, centered = False, eps=1e-10)
    # alpha - smoothing constant (default: 0.99)
    # eps (float, optional) – term added to the denominator to improve numerical stability (default: 1e-8)
    # momentum (float, optional) – momentum factor (default: 0)
    # lr (float, optional) – learning rate (default: 1e-2)
    # centered (bool, optional) – if True, compute the centered RMSProp, 
    # the gradient is normalized by an estimation of its variance
    # weight_decay (float, optional) – weight decay (L2 penalty) (default: 0)
    

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()


Test set: Average loss: -10.5727, Accuracy: 5578/10000 (56%)


Test set: Average loss: -21.9597, Accuracy: 8736/10000 (87%)


Test set: Average loss: -1.9435, Accuracy: 921/10000 (9%)




Test set: Average loss: -21.7622, Accuracy: 8279/10000 (83%)


Test set: Average loss: -10.4468, Accuracy: 7159/10000 (72%)


Test set: Average loss: -16.3838, Accuracy: 8332/10000 (83%)




Test set: Average loss: -13.2477, Accuracy: 7951/10000 (80%)


Test set: Average loss: -18.2968, Accuracy: 7588/10000 (76%)


Test set: Average loss: -8.4877, Accuracy: 6133/10000 (61%)


Test set: Average loss: -12.0413, Accuracy: 8081/10000 (81%)




Test set: Average loss: -16.2630, Accuracy: 7096/10000 (71%)


Test set: Average loss: -12.6306, Accuracy: 8377/10000 (84%)


Test set: Average loss: -24.9642, Accuracy: 6542/10000 (65%)




Test set: Average loss: -14.5613, Accuracy: 8428/10000 (84%)


Test set: Average loss: -19.3650, Accuracy: 8326/10000 (83%)


Test set: Average loss: -15.5661, Accuracy: 7131/10000 (71%)




Test set: Average loss: -25.6906, Accuracy: 8474/10000 (85%)


Test set: Average loss: -14.7256, Accuracy: 8413/10000 (84%)


Test set: Average loss: -21.5495, Accuracy: 8756/10000 (88%)


Test set: Average loss: -19.5772, Accuracy: 8442/10000 (84%)




Test set: Average loss: -3.0568, Accuracy: 3151/10000 (32%)


Test set: Average loss: -0.6554, Accuracy: 1028/10000 (10%)


Test set: Average loss: -10.7866, Accuracy: 7789/10000 (78%)




Test set: Average loss: -9.9251, Accuracy: 8328/10000 (83%)


Test set: Average loss: -17.6207, Accuracy: 8658/10000 (87%)


Test set: Average loss: -10.3560, Accuracy: 8446/10000 (84%)


Test set: Average loss: -18.0954, Accuracy: 9079/10000 (91%)




Test set: Average loss: -1.2659, Accuracy: 1068/10000 (11%)


Test set: Average loss: -5.9876, Accuracy: 1336/10000 (13%)


Test set: Average loss: -32.9327, Accuracy: 8367/10000 (84%)




Test set: Average loss: -18.8783, Accuracy: 8488/10000 (85%)


Test set: Average loss: -15.1750, Accuracy: 6011/10000 (60%)


Test set: Average loss: -17.6053, Accuracy: 8291/10000 (83%)




Test set: Average loss: -15.1313, Accuracy: 8717/10000 (87%)


Test set: Average loss: -21.6700, Accuracy: 7388/10000 (74%)


Test set: Average loss: -14.2985, Accuracy: 7982/10000 (80%)


Test set: Average loss: -31.1050, Accuracy: 7803/10000 (78%)




Test set: Average loss: -9.9180, Accuracy: 7590/10000 (76%)


Test set: Average loss: -23.4620, Accuracy: 8459/10000 (85%)


Test set: Average loss: -8.6433, Accuracy: 8300/10000 (83%)




Test set: Average loss: -11.2371, Accuracy: 8713/10000 (87%)


Test set: Average loss: -19.9367, Accuracy: 7530/10000 (75%)


Test set: Average loss: -17.0458, Accuracy: 8775/10000 (88%)




Test set: Average loss: -13.3216, Accuracy: 7991/10000 (80%)


Test set: Average loss: -56.6872, Accuracy: 8159/10000 (82%)


Test set: Average loss: -10.9741, Accuracy: 8609/10000 (86%)


Test set: Average loss: -13.5890, Accuracy: 8929/10000 (89%)




Test set: Average loss: -20.4825, Accuracy: 9028/10000 (90%)


Test set: Average loss: -1.0781, Accuracy: 980/10000 (10%)


Test set: Average loss: -15.1783, Accuracy: 8313/10000 (83%)

Traning and Testing total excution time is: 1960.0306355953217 seconds 


In [13]:
!rm -r mnist_data

'rm' is not recognized as an internal or external command,
operable program or batch file.
