In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.init as init

In [2]:
import sys; sys.argv=['']; del sys

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [4]:
class Net_BatchNorm(nn.Module):
    def __init__(self):
        super(Net_BatchNorm, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.batchN1= nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.batchN2= nn.BatchNorm2d(50)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.batchN3 = nn.BatchNorm1d(500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.batchN1(self.conv1(x)))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.batchN2(self.conv2(x)))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.batchN3(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
        

In [5]:
class Net_BatchNorm_DropOut(nn.Module):
    def __init__(self):
        super(Net_BatchNorm_DropOut, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.batchN1= nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.batchN2= nn.BatchNorm2d(50)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.batchN3 = nn.BatchNorm1d(500)
        self.fc2 = nn.Linear(500, 10)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.batchN1(self.conv1(x)))
        x = self.dropout(x)
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.batchN2(self.conv2(x)))
        x = self.dropout(x)
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.batchN3(self.fc1(x)))
        #x = self.dropout(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [6]:
class Net_dropOut(nn.Module):
    def __init__(self,dropout=0.5):
        super(Net_dropOut, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        #x = self.dropout(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [7]:
class Net_sigmoid(nn.Module):
    def __init__(self):
        super(Net_sigmoid, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = torch.sigmoid(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = torch.sigmoid(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = torch.sigmoid(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [8]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx ==len(train_loader)-1:# args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [9]:
def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [10]:
def InitialModel(train_loader,test_loader,device, args):
    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

In [11]:
def Model_Sigmoid(train_loader,test_loader,device, args):
    model = Net_sigmoid().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

In [12]:
def Model_DropOut(train_loader,test_loader,device, args):
    for dropout in [0.25,0.5,0.75,1]:
        print("\n\n DropOut is added for dropout value::",dropout,"\n")

        model = Net_dropOut(dropout).to(device)
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)

In [13]:
def Model_BatchNorm(train_loader,test_loader,device, args):
    model = Net_BatchNorm().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

In [14]:
def Model_BatchNorm_DropOut(train_loader,test_loader,device, args):
    model = Net_BatchNorm_DropOut().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

In [36]:
def weights_init_Xavier(m):
    if isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.normal_(m.bias.data)
        init.xavier_normal_(m.weight.data)

In [37]:
def weights_init_Kaiming(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.normal_(m.bias.data)
        init.kaiming_normal_(m.weight.data)

In [38]:
def Model_BatchNorm_InitializeWeights(train_loader,test_loader,device, args,initType):
    model = Net_BatchNorm().to(device)
    if(initType=='Xavier'):
        model.apply(weights_init_Xavier)
    else:
        model.apply(weights_init_Kaiming)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

In [17]:
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 10000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=1000, metavar='N',
                    help='how many batches to wait before logging training status')


parser.add_argument('--save-model', action='store_true', default=False,
                    help='For Saving the current Model')
args = parser.parse_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)


    

In [18]:
print("For ReLU model:")
InitialModel(train_loader,test_loader,device, args)

For ReLU model:

Test set: Average loss: 0.1020, Accuracy: 9662/10000 (97%)


Test set: Average loss: 0.0613, Accuracy: 9826/10000 (98%)


Test set: Average loss: 0.0564, Accuracy: 9804/10000 (98%)


Test set: Average loss: 0.0409, Accuracy: 9860/10000 (99%)


Test set: Average loss: 0.0382, Accuracy: 9870/10000 (99%)


Test set: Average loss: 0.0333, Accuracy: 9896/10000 (99%)


Test set: Average loss: 0.0345, Accuracy: 9874/10000 (99%)


Test set: Average loss: 0.0390, Accuracy: 9875/10000 (99%)


Test set: Average loss: 0.0288, Accuracy: 9909/10000 (99%)


Test set: Average loss: 0.0311, Accuracy: 9896/10000 (99%)



In [19]:
print("For Sigmoid model:")
Model_Sigmoid(train_loader,test_loader,device, args)

For Sigmoid model:

Test set: Average loss: 2.3136, Accuracy: 1843/10000 (18%)


Test set: Average loss: 2.2830, Accuracy: 1095/10000 (11%)


Test set: Average loss: 2.0850, Accuracy: 3225/10000 (32%)


Test set: Average loss: 0.9830, Accuracy: 7534/10000 (75%)


Test set: Average loss: 0.5749, Accuracy: 8312/10000 (83%)


Test set: Average loss: 0.4403, Accuracy: 8690/10000 (87%)


Test set: Average loss: 0.3647, Accuracy: 8925/10000 (89%)


Test set: Average loss: 0.3106, Accuracy: 9085/10000 (91%)


Test set: Average loss: 0.2711, Accuracy: 9185/10000 (92%)


Test set: Average loss: 0.2427, Accuracy: 9261/10000 (93%)



In [20]:
print("Dropout added to the model::::\n")
args.epochs = 5
Model_DropOut(train_loader,test_loader,device, args)

Dropout added to the model::::



 DropOut is added for dropout value:: 0.25 


Test set: Average loss: 0.1381, Accuracy: 9739/10000 (97%)


Test set: Average loss: 0.0893, Accuracy: 9790/10000 (98%)


Test set: Average loss: 0.0594, Accuracy: 9874/10000 (99%)


Test set: Average loss: 0.0499, Accuracy: 9876/10000 (99%)


Test set: Average loss: 0.0554, Accuracy: 9881/10000 (99%)



 DropOut is added for dropout value:: 0.5 


Test set: Average loss: 0.2533, Accuracy: 9753/10000 (98%)


Test set: Average loss: 0.1766, Accuracy: 9847/10000 (98%)


Test set: Average loss: 0.1417, Accuracy: 9886/10000 (99%)


Test set: Average loss: 0.1353, Accuracy: 9884/10000 (99%)


Test set: Average loss: 0.1191, Accuracy: 9885/10000 (99%)



 DropOut is added for dropout value:: 0.75 


Test set: Average loss: 0.5044, Accuracy: 9736/10000 (97%)


Test set: Average loss: 0.4193, Accuracy: 9846/10000 (98%)


Test set: Average loss: 0.3608, Accuracy: 9867/10000 (99%)


Test set: Average loss: 0.3395, Ac

In [21]:
args.epochs=10
print("BatchNorm, Dropout of 0.5 with RELU")
Model_BatchNorm_DropOut(train_loader,test_loader,device, args)
print("\n \n BatchNorm with RELU")
Model_BatchNorm(train_loader,test_loader,device, args)

BatchNorm, Dropout of 0.5 with RELU

Test set: Average loss: 0.5209, Accuracy: 8732/10000 (87%)


Test set: Average loss: 0.4454, Accuracy: 8811/10000 (88%)


Test set: Average loss: 0.4157, Accuracy: 8870/10000 (89%)


Test set: Average loss: 0.3678, Accuracy: 8996/10000 (90%)


Test set: Average loss: 0.3490, Accuracy: 9052/10000 (91%)


Test set: Average loss: 0.3954, Accuracy: 8801/10000 (88%)


Test set: Average loss: 0.4347, Accuracy: 8554/10000 (86%)


Test set: Average loss: 0.4944, Accuracy: 8176/10000 (82%)


Test set: Average loss: 0.4362, Accuracy: 8529/10000 (85%)


Test set: Average loss: 0.4641, Accuracy: 8384/10000 (84%)


 
 BatchNorm with RELU

Test set: Average loss: 0.0571, Accuracy: 9845/10000 (98%)


Test set: Average loss: 0.0387, Accuracy: 9891/10000 (99%)


Test set: Average loss: 0.0325, Accuracy: 9898/10000 (99%)


Test set: Average loss: 0.0270, Accuracy: 9912/10000 (99%)


Test set: Average loss: 0.0260, Accuracy: 9922/10000 (99%)


Test set: Average loss: 

In [39]:
print("BatchNorm,Initialization of weights with Xavier for Conv and Linear with RELU")
Model_BatchNorm_InitializeWeights(train_loader,test_loader,device, args,'Xavier')

BatchNorm,Initialization of weights with Xavier for Conv and Linear with RELU

Test set: Average loss: 0.0626, Accuracy: 9822/10000 (98%)


Test set: Average loss: 0.0457, Accuracy: 9868/10000 (99%)


Test set: Average loss: 0.0397, Accuracy: 9884/10000 (99%)


Test set: Average loss: 0.0384, Accuracy: 9885/10000 (99%)


Test set: Average loss: 0.0311, Accuracy: 9899/10000 (99%)


Test set: Average loss: 0.0306, Accuracy: 9900/10000 (99%)


Test set: Average loss: 0.0281, Accuracy: 9909/10000 (99%)


Test set: Average loss: 0.0279, Accuracy: 9913/10000 (99%)


Test set: Average loss: 0.0281, Accuracy: 9912/10000 (99%)


Test set: Average loss: 0.0270, Accuracy: 9913/10000 (99%)



In [40]:
print("BatchNorm,Initialization of weights with Kaiming for Conv and Linear with RELU")
Model_BatchNorm_InitializeWeights(train_loader,test_loader,device, args,'Kaiming')  

BatchNorm,Initialization of weights with Kaiming for Conv and Linear with RELU

Test set: Average loss: 0.0839, Accuracy: 9771/10000 (98%)


Test set: Average loss: 0.0609, Accuracy: 9826/10000 (98%)


Test set: Average loss: 0.0504, Accuracy: 9853/10000 (99%)


Test set: Average loss: 0.0441, Accuracy: 9878/10000 (99%)


Test set: Average loss: 0.0404, Accuracy: 9881/10000 (99%)


Test set: Average loss: 0.0374, Accuracy: 9883/10000 (99%)


Test set: Average loss: 0.0347, Accuracy: 9893/10000 (99%)


Test set: Average loss: 0.0342, Accuracy: 9896/10000 (99%)


Test set: Average loss: 0.0325, Accuracy: 9894/10000 (99%)


Test set: Average loss: 0.0330, Accuracy: 9900/10000 (99%)

