In [1]:
from __future__ import print_function

import numpy as np
import matplotlib.pyplot as plt
import copy

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.optim.lr_scheduler as LR

import torchvision
import torchvision.transforms as transforms

In [2]:
transform = transforms.ToTensor()


trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)

batch_size = 200

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

print('==>>> total training batch number: {}'.format(len(trainloader)))
print('==>>> total testing batch number: {}'.format(len(testloader)))

# for i, data in enumerate(trainloader, 0):
#     print(len(data))

==>>> total training batch number: 300
==>>> total testing batch number: 50


In [3]:
# def show_batch(batch):
#     im = torchvision.utils.make_grid(batch)
#     plt.imshow(np.transpose(im.numpy(), (1, 2, 0)))
#     plt.show()


# dataiter = iter(trainloader)
# images, labels = dataiter.next()

# print('Labels: ', labels)
# print('Batch shape: ', images.size())
# show_batch(images)

# images.view(batch_size, -1).size()

In [4]:
INPUTSIZE = 28*28
NBLAYERS = 9
LAYERSIZE = 60

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, LAYERSIZE)
        self.fc2 = nn.Linear(LAYERSIZE, LAYERSIZE)
        self.fc3 = nn.Linear(LAYERSIZE, LAYERSIZE)

    def forward(self, x):
        u1 = self.fc1(x.view(batch_size, -1))
        y1 = F.tanh(u1)
        u2 = self.fc2(y1)
        y2 = F.tanh(u2)
        u3 = self.fc3(y2)
        y3 = F.relu(u3)
        return y3
    
class BatchNormNet(nn.Module):
    def __init__(self):
        super(BatchNormNet, self).__init__()
        self.fc1 = nn.Linear(28*28, LAYERSIZE)
        self.bn1 = nn.BatchNorm1d(LAYERSIZE)
        self.fc2 = nn.Linear(LAYERSIZE, LAYERSIZE)
        self.bn2 = nn.BatchNorm1d(LAYERSIZE)
        self.fc3 = nn.Linear(LAYERSIZE, LAYERSIZE)
        self.bn3 = nn.BatchNorm1d(LAYERSIZE)

    def forward(self, x):
        u1 = self.fc1(x.view(batch_size, -1))
        y1 = F.tanh(self.bn1(u1))
        u2 = self.fc2(y1)
        y2 = F.tanh(self.bn2(u2))
        u3 = self.fc3(y2)
        y3 = F.relu(self.bn3(u3))
        return y3

    
criterion = nn.CrossEntropyLoss()
# eta = 0.002
    
def train_model(network, optimization, seed, IP=True):
    torch.manual_seed(seed)
    
    for epoch in range(20):  # loop over the dataset multiple times
        running_loss = 0.0
        
        for i, data in enumerate(trainloader, 0):

            # get the inputs
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimization.zero_grad()

            # forward + backward + optimize
            y3 = network(inputs)
            loss = criterion(y3, labels)
            loss.backward()
            optimization.step()

            # print statistics
            running_loss += loss.item()
            if i % 100 == 99:    # print every 2000 mini-batches
    #             print(net.fc2.weight)
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
            
    print("Finished training!\n")

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

seed = 250

# #Train IP Model
# torch.manual_seed(seed)
# IPnet = Net()
# IPnet = IPnet.to(device)

# optimizer1 = optim.Adam(IPnet.parameters(), lr=0.001)
# print("Training IP Net")
# train_model(IPnet, optimizer1, seed, True)

# #Train Standard Model
# torch.manual_seed(seed)
# net = Net()
# net = net.to(device)

# optimizer2 = optim.Adam(net.parameters(), lr=0.001)
# print("Training Standard Net")
# train_model(net, optimizer2, seed, False)

#Train Batch Normalised Model
torch.manual_seed(seed)
Batch_Net = BatchNormNet()
Batch_Net = Batch_Net.to(device)

optimizer = optim.Adam(filter(lambda p: p.requires_grad, Batch_Net.parameters()), lr=3e-2)
print("Training Batch Normalised Net")
train_model(Batch_Net, optimizer, seed)

#Train Deep Standard Model
torch.manual_seed(seed)
Standard_Net = Net()
Standard_Net = Standard_Net.to(device)

optimizer = optim.Adam(filter(lambda p: p.requires_grad, Standard_Net.parameters()), lr=3e-2)
print("Training Standard Net")
train_model(Standard_Net, optimizer, seed)

Training Batch Normalised Net
[1,   100] loss: 0.049
[1,   200] loss: 0.013
[1,   300] loss: 0.010
[2,   100] loss: 0.007
[2,   200] loss: 0.007
[2,   300] loss: 0.006
[3,   100] loss: 0.005
[3,   200] loss: 0.005
[3,   300] loss: 0.005
[4,   100] loss: 0.004
[4,   200] loss: 0.004
[4,   300] loss: 0.004
[5,   100] loss: 0.003
[5,   200] loss: 0.004
[5,   300] loss: 0.004
[6,   100] loss: 0.003
[6,   200] loss: 0.003
[6,   300] loss: 0.003
[7,   100] loss: 0.003
[7,   200] loss: 0.003
[7,   300] loss: 0.003
[8,   100] loss: 0.002
[8,   200] loss: 0.002
[8,   300] loss: 0.003
[9,   100] loss: 0.002
[9,   200] loss: 0.002
[9,   300] loss: 0.003
[10,   100] loss: 0.002
[10,   200] loss: 0.002
[10,   300] loss: 0.002
[11,   100] loss: 0.002
[11,   200] loss: 0.002
[11,   300] loss: 0.002
[12,   100] loss: 0.002
[12,   200] loss: 0.002
[12,   300] loss: 0.002
[13,   100] loss: 0.002
[13,   200] loss: 0.002
[13,   300] loss: 0.002
[14,   100] loss: 0.001
[14,   200] loss: 0.002
[14,   300] l

In [6]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        y3  = Batch_Net(images)
        _, predicted = torch.max(y3.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the batch normalised network on the 10000 test images: %d %%' % (
    100 * correct / total))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        y3  = Standard_Net(images)
        _, predicted = torch.max(y3.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the standard network on the 10000 test images: %d %%' % (
    100 * correct / total))

# val1, ind1 = DIPnet.fc1.weight.max(0)
# max_weight = val1.max(0)
# print(max_weight)

# val1, ind1 = Dnet.fc1.weight.max(0)
# max_weight = val1.max(0)
# print(max_weight)

# print("\n", DIPnet.bias1.data)

# print("\n", DIPnet.gain1.data)
# print(Ey1[0])

Accuracy of the batch normalised network on the 10000 test images: 96 %
Accuracy of the standard network on the 10000 test images: 93 %
