<a href="https://colab.research.google.com/github/Aparna-Sakshi/DLFA/blob/main/DLFA_assignment_1_part1_18MA20007.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment-1 : Training a neural network using back propagation

---


### Name: Aparna Sakshi
### Roll No.:18MA20007

#### Importing all the libraries

In [None]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchvision import transforms,datasets
import torch.optim as optim
from torch.autograd import Variable
import torchvision



import matplotlib.pyplot as plt
import time

import torch
import torchvision

#### Downloading MNIST Dataset

In [None]:
apply_transform = transforms.Compose([transforms.Resize(28), transforms.ToTensor()])
BatchSizes = [256,512,1024] # change according to system specs
colours=['blue', 'red', 'green']
no_of_batch_sizes=len(BatchSizes)


trainsets=[]
trainLoaders=[]
testsets=[]
testLoaders=[]

for BatchSize in BatchSizes:  
  trainset = datasets.MNIST(root='./MNIST', train=True, download=True, transform=apply_transform)
  trainLoader = torch.utils.data.DataLoader(trainset, batch_size=BatchSize,
                                            shuffle=True, num_workers=4) # Creating dataloader

  # Validation set with random rotations in the range [-90,90]
  testset = datasets.MNIST(root='./MNIST', train=False, download=True, transform=apply_transform)
  testLoader = torch.utils.data.DataLoader(testset, batch_size=BatchSize,
                                          shuffle=False, num_workers=4) # Creating dataloader

  trainsets.append(trainset)
  trainLoaders.append(trainLoader)
  testsets.append(testset)
  testLoaders.append(testLoader)



In [None]:
# Size of train and test datasets
for i in range(len(BatchSizes)):
  print('No. of samples in train set '+str(i)+" is "+str(len(trainLoaders[i].dataset)))
  print('No. of samples in test set '+str(i)+" is "+str(len(testLoaders[i].dataset)))


No. of samples in train set 0 is 60000
No. of samples in test set 0 is 10000
No. of samples in train set 1 is 60000
No. of samples in test set 1 is 10000
No. of samples in train set 2 is 60000
No. of samples in test set 2 is 10000


## Feed Forward Neural Network

In [None]:
class FFNN(nn.Module):
  def __init__(self):
    super(FFNN,self).__init__()
    self.fc1 = nn.Linear(784,100)
    self.sigmoid=nn.Sigmoid()
    self.fc2 = nn.Linear(100, 10)
    
  def forward(self, x):
    x=self.fc1(x)
    x=self.sigmoid(x)
    x=self.fc2(x)
    x=F.softmax(x,dim=1)
    #print(x.size())
    return x

In [None]:
use_gpu = torch.cuda.is_available()
net = FFNN()
print(net)
if use_gpu:
  print('GPU is available')
  net=net.cuda()

FFNN(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (sigmoid): Sigmoid()
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)
GPU is available


#### Training the data using Stochastic Gradient Descent

In [None]:
#SDG for one batchsize and one learning rate
def training_SGD(num_epochs, trainLoader, learning_rate, net, critreion, adam, decay):
  train_loss = []
  train_acc = []
  optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)#SGD
  if adam:
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)  
  for epoch in range(num_epochs):
    running_loss = 0.0
    running_corr = 0
    for i,data in enumerate(trainLoader):
      inputs,labels = data
      if use_gpu:
        inputs, labels = inputs.cuda(), labels.cuda()
      #Initializing model gradients to zero
      optimizer.zero_grad()
      #Data feed-forward through the network
      outputs = net(inputs.view(-1,28*28))
      #Predicted class is the one with maximum probablity
      preds = torch.argmax(outputs, dim=1)
      #Finding the loss
      loss = critreion(outputs, labels)
      #Accumulating the loss for each batch
      running_loss += loss
      #Accumulate number of correct predictions
      running_corr += torch.sum(preds==labels)

    totalLoss1 = running_loss/(i+1)
    #Calculating gradients
    totalLoss1.backward()
    #Updating the model parameters
    optimizer.step()

    # decays the learning rate
    if decay:
      learning_rate*=0.99
      optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

    epoch_loss = running_loss.item()/(i+1)#total loss for one epoch
    epoch_acc = running_corr.item()/60000

    train_loss.append(epoch_loss)#Saving the loss over epochs
    train_acc.append(epoch_acc)#saving the accuracy over epochs

    print('Epoch{:.0f}/{:.0f}: Training loss: {:.4f}|Training Accuracy: {:.4f}'.format(epoch+1,num_epochs,epoch_loss,epoch_acc*100))
  return train_loss,train_acc




#### Plotting loss Vs epoch for various batch sizes

In [None]:
#plotting loss and accuracy Vs no. of epochs for one learning rate
def plot_data(losses,accs,no_of_batch_sizes,num_epochs,learning_rate):
  fig = plt.figure(figsize=[15,5]) 
  
  plt.title("Train data set with Learning rate: "+str( learning_rate))   
  plt.subplot(121)
  for i in range(no_of_batch_sizes):
    plt.plot(range(num_epochs),losses[i],'r-',color=colours[i],label='Batch size='+str(BatchSizes[i]))     
  plt.legend(loc='upper right')
  plt.xlabel('Epochs')
  plt.ylabel('Training loss/error')
  plt.subplot(122)
  for i in range(no_of_batch_sizes):
    plt.plot(range(num_epochs),accs[i],'g-',color=colours[i],label='Batch size='+str(BatchSizes[i])) 
  
  plt.legend(loc='upper right')
  plt.xlabel('Epochs')
  plt.ylabel('Training Accuracy')

In [None]:
#test data vs batch size
def plot_data2(no_batch_sizes, test_losses, test_accs):
  fig = plt.figure(figsize=[15,5]) 
  plt.subplot(121)
  plt.plot(no_batch_sizes,test_losses,'r-',label='Loss/error') 
  plt.legend(loc='upper right')
  plt.xlabel('batch size')
  plt.ylabel('loss')
  plt.subplot(122)
  plt.plot(no_batch_sizes,test_accs,'g-',label='Accuracy') 
  plt.legend(loc='upper right')
  plt.xlabel('batch size')
  plt.ylabel('accuracy')

#test data vs learning rate
def plot_data3(no_learning_rates, test_losses, test_accs):
  fig = plt.figure(figsize=[15,5]) 
  plt.subplot(121)
  plt.plot(no_learning_rates,test_losses,'r-',label='Loss/error') 
  plt.legend(loc='upper right')
  plt.xlabel('learning rate')
  plt.ylabel('loss')
  plt.subplot(122)
  plt.plot(no_learning_rates,test_accs,'g-',label='Accuracy') 
  plt.legend(loc='upper right')
  plt.xlabel('learning rate')
  plt.ylabel('accuracy')

In [None]:
def test_SGD(testLoader, net, batch_size, learning_rate):
  correct_pred=0
  running_loss=0
  total_loss=0
  
  for i,data in enumerate(testLoader):
      inputs,labels = data
      if use_gpu:
          inputs, labels = inputs.cuda(),labels.cuda()
      # Feedforward train data batch through model
      output = net(inputs.view(-1,28*28))
      loss = critreion(output, labels)
      total_loss += loss
      # Predicted class is the one with maximum probability
      preds = torch.argmax(output,dim=1)
      correct_pred += torch.sum(preds==labels)
      
    

  test_accuracy = correct_pred.item()/10000.0
  print('Testing accuracy (Batch size={:.0f}, lr={:.2f}) = {:.4f}'.format(batch_size, learning_rate, test_accuracy*100)) 
  return total_loss,test_accuracy
  

In [None]:
critreion=nn.CrossEntropyLoss()
learning_rates = [0.025,0.05,0.1,0.2,0.5]
num_epochs = 1
trainLoader=trainLoaders[-1]
def SDG_for_different_LR(num_epochs, trainLoaders,testLoaders ,learning_rates, critreion, adam=False, decay=False):
  losses=[]
  accs=[]
  for learning_rate in learning_rates:  
    use_gpu = torch.cuda.is_available()
    net = FFNN()
    print(net)
    if use_gpu:
      print('GPU is available')
      net=net.cuda()
    print("---------------------------------------------------------------------------------------------------------------------")
    print("Learning rate: ", learning_rate)
    print("---------------------------------------------------------------------------------------------------------------------")
    train_losses=[]
    train_accs=[]
    test_losses=[]
    test_accs=[]
    
    for i in range(no_of_batch_sizes):
      print("______________________________________________________________")
      print("Batch size: ", BatchSizes[i])
      print("______________________________________________________________")
      train_loss,train_acc = training_SGD(num_epochs, trainLoaders[i], learning_rate, net, critreion, adam, decay)
      test_loss,test_acc=test_SGD(testLoaders[i], net,BatchSizes[i], learning_rate)
      train_losses.append(train_loss)
      train_accs.append(train_acc)
      test_losses.append(test_loss)
      test_accs.append(test_acc)      
    losses.append(test_losses)
    accs.append(test_accs)
    plot_data(train_losses,train_accs,no_of_batch_sizes,num_epochs,learning_rate)
    plot_data2(BatchSizes, test_losses, test_accs)

  
  for i in range(no_of_batch_sizes):    
    plot_data3(learning_rates,[losses[j][i] for j in range(len(learning_rates))],[accs[j][i] for j in range(len(learning_rates))])
    

    
    
  




In [None]:
#Schocastic gradient descent with adam
SDG_for_different_LR(num_epochs, trainLoaders,testLoaders, learning_rates, critreion)

In [None]:
#Schocastic gradient descent with decay
SDG_for_different_LR(num_epochs, trainLoaders,testLoaders, learning_rates, critreion, decay=True)

In [None]:
#Schocastic gradient descent with adam
SDG_for_different_LR(num_epochs, trainLoaders,testLoaders, learning_rates, critreion, adam=True)