### Basic PyTorch tutorial

This tutorial will serve as a simple introduction to PyTorch. It will cover the following: 
1. Declare and initialize a neural network (Multi-layer perceptron)
2. Load train/test data (MNIST)
3. Train a neural network to perform the task
4. Evaluate test accuracy

In [1]:
## IMPORTING A BUNCH OF PYTORCH LIBRARIES 

import numpy as np
import torch
import torchvision 
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import pickle
import seaborn as sns; sns.set()
import copy, random
import torch.nn as nn
import torch.nn.functional as F

#### Loading train/test data (MNIST)

In [2]:
trainset =   torchvision.datasets.MNIST('MNIST/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = torchvision.datasets.MNIST('MNIST/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

testloader = torch.utils.data.DataLoader( testset, batch_size=4, shuffle=True)

classes = ('0','1', '2', '3', '4','5', '6', '7', '8', '9')

#### Declaring a neural network (Multi-layer perceptron)

In [3]:
# MLP DEFINITION

### This network has 4 layers: 
### Input [784 x 1] --> Hidden-1 [300 x 1] --> Hidden-2 [100 x 1] --> Output [10 x 1]

import torch.nn as nn
import torch.nn.functional as F

class MLP_MNIST(nn.Module):
    
    def __init__(self):
        super(MLP_MNIST, self).__init__()
        random_seed = 1;
        torch.manual_seed(random_seed)
        
        self.fc = nn.Sequential(
            nn.Linear(784,300),
            nn.ReLU(True),
            nn.Linear(300,100),
            nn.ReLU(True),
            nn.Linear(100,10)
            )
        
    def forward(self,x):
        
        x = self.fc(x);
        return x
        


In [4]:
# Initializing the weights and biases of the neural network. This is a Xavier initialization; 
# Optimal initialization of NN's is an entire field in itself :D 

def initialize_network(net):

    random_seed = 1;
    torch.manual_seed(random_seed)
    for m in net.modules():
        torch.manual_seed(random_seed)
        if isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight);
            nn.init.uniform_(m.bias);
    
    return net
    '''
    for name, param in autoenc_ch.named_parameters():
        if param.requires_grad:
            print(name, param.data)
    '''


In [5]:
### Function to evaluate the test accuracy of the network on test-data from MNIST

def generalization_acc(net_mod):

    correct = 0;
    total = 0
    batch_size = 4;
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.view(batch_size, -1)
            outputs = net_mod(images);
            _, predicted = torch.max(outputs.data,1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    #print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
    return (100 * correct / total);

In [6]:
### Function to evaluate the train-accuracy of the network on the train-data from MNIST
## (NOTE): There may be cases where the network has a very high train-accuracy but low test-accuracy --> commonly called overfitting

def train_accuracyMNIST(net_mod):

    correct = 0;
    total = 0
    batch_size = 4;
    with torch.no_grad():
        for data in trainloader:
            images, labels = data
            images = images.view(batch_size, -1)
            outputs = net_mod(images);
            _, predicted = torch.max(outputs.data,1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 60000 train images: %d %%' % (
        100 * correct / total))
    return (100 * correct / total);

In [7]:
### Defining a Loss function for the neural network. 
# here, we use the cross-entropy loss. One can use L2 loss (any loss that is appropriate for the task at hand)

import torch.optim as optim

criterion = nn.CrossEntropyLoss();
num_epochs = 2;

In [13]:
### Function for training neural network 

def trainNetwork_MNIST(net, wtsDict, num_epochs=2):

    ## net: corresponds to the neural network that requires training
    ## wtsDict: dictionary of weights, if you want to store the weights of intermediate networks while training (not reqd for conventional purposes)
    ## num_epochs: number of epochs networks are to be trained for. One epoch is training the network on all train-datapoints in the batch
    
    
    #storeWts_manifold2(net, wtsDict)
    
    ## Choosing an optimizer. Here, we choose Stochastic gradient descent (SGD) with learning rate=0.001 and momentum = 0.9
    optimizer = optim.SGD(net.parameters(),lr=0.001, momentum=0.9)
    
    for epoch in range(num_epochs):

        running_loss = 0.0;
        batch_size = 4;

        ctr_run = 0;
        for i in range(0, 60000, batch_size):
                
            image_input = torch.empty(batch_size,784)
            labels = torch.empty(batch_size);

            for j in range(batch_size):
                img, label = trainset[i+j]
                img = img[0,:,:];
                img = img.view(1,784)
        
                image_input[j,:] = img;
                labels[j] = label;

            output = net(image_input);
            loss = criterion(output, labels.long());

            # =====================backward==================
            optimizer.zero_grad()
            loss.backward()
            #plot_grad_flow(autoenc_ch.named_parameters)
            optimizer.step()

            running_loss += loss.item()
            if ctr_run % 2000 == 1999: # print every 8000 samples
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, ctr_run + 1, running_loss / 2000))
                running_loss = 0.0;
                #storeWts_manifold2(net, wtsDict); 
                #calcLoss_network.append(calculateLoss_network(net))
                
            ctr_run += 1;

        acc = train_accuracyMNIST(net);
        print("within func")
        
        
        ## I terminate training when accuracy reaches 98%; 
        ## One could train for a fixed number of epochs or until the network reaches a particular accuracy.
        if num_epochs != 2:
            # Evaluate if train accuracy is greater than 98%
            if acc>98:
                #storeWts_manifold2(net, wtsDict)
                return (epoch+1, acc)
        
    return (epoch+1, acc)


In [14]:
## Calling all the functions [to train the net]

net_MNIST = MLP_MNIST();

wtsDictionary_landscape = {};
ctr = 1;
for n,p in net_MNIST.named_parameters():
        
    temp = list(p.view(1,-1).size());
    wtsDictionary_landscape[ctr] = np.array([]).reshape(0,temp[1]);
    ctr += 1
    
net_train = copy.deepcopy(net_MNIST);
[returnTime, acc, calcLoss_network] = trainNetwork_MNIST(net_train, wtsDictionary_landscape, 5)




[1,  2000] loss: 0.589
[1,  4000] loss: 0.291
[1,  6000] loss: 0.208
[1,  8000] loss: 0.181
[1, 10000] loss: 0.157
[1, 12000] loss: 0.147
[1, 14000] loss: 0.129
Accuracy of the network on the 60000 train images: 96 %
within func
[2,  2000] loss: 0.103
[2,  4000] loss: 0.097
[2,  6000] loss: 0.092
[2,  8000] loss: 0.087
[2, 10000] loss: 0.084
[2, 12000] loss: 0.083
[2, 14000] loss: 0.074
Accuracy of the network on the 60000 train images: 97 %
within func
[3,  2000] loss: 0.065
[3,  4000] loss: 0.056
[3,  6000] loss: 0.056
[3,  8000] loss: 0.054
[3, 10000] loss: 0.056
[3, 12000] loss: 0.054
[3, 14000] loss: 0.048
Accuracy of the network on the 60000 train images: 98 %
within func


ValueError: not enough values to unpack (expected 3, got 2)