## Standard Neural Network Implementation using MNIST Dataset

In [2]:
# imports
# create a full connected network
# set device
# Hyperparameters
# Initialize network
# Loss and optimizer
# Train Network
# Check accuracy on a test set to see how good our model is

In [3]:
# let's create a fully connected network
import torch
class Standard(torch.nn.Module):
  def __init__(self , input_size , hidden_size , number_of_classes):
    super(Standard , self).__init__()
    self.linear1 = torch.nn.Linear(in_features = input_size , out_features = hidden_size)
    self.activation1 = torch.nn.ReLU()
    self.linear2 = torch.nn.Linear(in_features = hidden_size , out_features = number_of_classes)
    self.activation2 = torch.nn.Sigmoid()
  def forward(self , x):
    x = x.view(x.size(0) , -1)
    x = self.activation1(self.linear1(x))
    outs = self.activation2(self.linear2(x))
    return outs

In [4]:
# let's create a sample network
model = Standard(784 , 50 , 10)
inputs = torch.rand(64 , 784)
print("The shape of our mode is : " , model(inputs).shape)

The shape of our mode is :  torch.Size([64, 10])


In [5]:
# creating a dataset
import torch
import torchvision
from torchvision.transforms import transforms
train_set= torchvision.datasets. MNIST(root = './root', train = True ,  download = True , transform = transforms.ToTensor())
test_set = torchvision.datasets.MNIST(root = './root' , train = False , download = True , transform = transforms.ToTensor())

# loading the data 
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_set , batch_size = batch_size ,shuffle = True , num_workers = 0 , pin_memory = True )
test_loader =  torch.utils.data.DataLoader(test_set , batch_size =  batch_size , shuffle = True , num_workers = 0 , pin_memory = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./root/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./root/MNIST/raw/train-images-idx3-ubyte.gz to ./root/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./root/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./root/MNIST/raw/train-labels-idx1-ubyte.gz to ./root/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./root/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./root/MNIST/raw/t10k-images-idx3-ubyte.gz to ./root/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./root/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./root/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./root/MNIST/raw



In [6]:
# set device
def get_default_device():
  if torch.cuda.is_available():
    return torch.device("cuda")
  else:
    return torch.device("cpu")

my_device= get_default_device()

In [7]:
def to_device(data , device):
  return data.to(device)

In [8]:
# let's create a Device data loader class to wrap up the data loader
# class DeviceDataLoader():
#   def __init__(self , loader , device):
#     self.loader = loader
#     self.device = device
#   def __iter__(self):
#     for data , labels in self.loader:
#       yield to_device(data , self.device)
#       yield to_device(labels , self.device)

#   def __len__(self):
#     return len(self.loader)

In [9]:
# # let's create an object
# train_loader = DeviceDataLoader(loader = train_loader , device = my_device)
# test_loader = DeviceDataLoader(loader = test_loader , device =  my_device )
# print(train_loader.device)
# print(test_loader.device)

In [10]:
# Initialize the network
input_size = 784
hidden_size = 50
output_size = 10
model = Standard(input_size = input_size , hidden_size = hidden_size , number_of_classes = output_size)
model = to_device(data = model , device = my_device)

In [11]:
# define the loss and the optimizer
learning_rate = 0.01
cross_entropy = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters() , lr = learning_rate)

In [12]:
# let's train it on the MNIST dataset 
num_epochs  = 1
num_correct = 0
num_samples = 0
for epochs in range(num_epochs):
  for data,labels in train_loader:
    data = to_device(data , my_device)
    labels = to_device(labels , my_device)
    prediction = model(data)
    crossentropy_loss = torch.nn.CrossEntropyLoss()
    loss = crossentropy_loss(prediction , labels)
    # computing the gradients with respect to the weights
    loss.backward()
    # define optimizer
    opt = torch.optim.Adam(model.parameters() , lr = 0.01)
    # updating the weights 
    opt.step()
    # resetting the gradients to zero
    opt.zero_grad()

    # check the correct samples
    _,scores = prediction.max(1)
    num_correct += (scores == labels).sum().item()
    num_samples += labels.size(0)

training_accuracy = (num_correct / num_samples)*100
print("The los after one epoch is :" , loss.item())
print("print the number of correct examples :" , num_correct)
print("print the number of samples in the training set :" , num_samples)
print("The training accuracy will be :" , training_accuracy)

The los after one epoch is : 1.5497691631317139
print the number of correct examples : 53667
print the number of samples in the training set : 60000
The training accuracy will be : 89.445


There are certain layers that behave differently in training and evaluation models such as the dropout and batch normalize layer. 

`Dropout layer :- ` The drop out layer use for regularization , with the argument 'p' that denotes the drop probability , which is used to determine the probability of dropping the units during training. 

* One important point to remember is that units may drop randomly during training only , for the evaluation or inference phase , all the hidden units must be active. To ensure that the overall activations are on the same scale during training and predictions , the activations of the active neurons have to be scaled appropriately. 

* When calling this layer , its behaviour can be controlled via model.train() and model.eval() to specify whether this call will be made during training and during inference , when using drop out , alternating between these two modes is crucial to ensure that is behaves correctly , for instance , nodes are only randomly dropped during training not evaluation or inference.

`Batch Normalization Layer :- ` The PyTorch API provides a class , nn.BatchNorm2d() that we can use a layer when defining our models. Note that the behaviour of updating the learnable parameters depends on whether the model is a training model or not , these parameters are learned only during training are then used for noramlization during evaluation. because normalization used for evaluation(testing)

* Note that using the designated setting for training model.train() and evaluation model.eval() will automatically set the mode for the drop out layer and batch normalization layers and rescale appropriately so that we do not have to worry about that at all. 

Using `with torch.no_grad():-` The validation loop looks very similar to training but is somewhat simplified. The key difference is that validation is read only , specifically the loss value returned is not used(We don't use the loss functions for gradient purpose) and weights are not updated. 

In [13]:
# Check the accuracy on the test set 
def check_accuracy(loader , model):
  num_correct = 0
  num_samples = 0
  model.eval()

  with torch.no_grad():
    for inputs , labels in loader:
      inputs = to_device(inputs , my_device)
      labels = to_device(labels , my_device)
      scores = model(inputs)
      _ , predictions = scores.max(1)
      num_correct += (predictions == labels).sum().item()
      num_samples += labels.size(0)
  
  print("The accuracy of this model on the test is ")
  acc = (num_correct / num_samples)*100
  return acc

test_accuracy = check_accuracy(test_loader , model = model)
print("our test accuracy is :" , test_accuracy)

The accuracy of this model on the test is 
our test accuracy is : 92.63


In [14]:
# let train our model for multiple epochs 
num_of_epochs = 100
for epochs in range(num_of_epochs):
  for batch_idxs , (inputs , labels) in enumerate(train_loader):
    inputs = to_device(inputs , my_device)
    labels = to_device(labels , my_device)
    predictions = model(inputs)
    loss = cross_entropy(predictions , labels)
    # compute the gradients w.r.t the weights 
    loss.backward()
    # update the weights
    optim.step()
    # resetting the gradients
    optim.zero_grad()
    # check the correct samples
    _,scores = predictions.max(1)
    num_correct += (scores == labels).sum().item()
    num_samples += labels.size(0)
  # print("Print the progress")
  if(epochs+1)%10==0:
      print('Epoch [{}/{}], loss:{:.4f}'.format(epochs+1 , num_of_epochs , loss.item()))
      
training_accuracy = (num_correct / num_samples)*100

print("The number of correct examples after 100 epochs is :" , num_correct)
print("The number of samples in the training set is :" , num_samples)
print("The training accuracy after 100 epochs is :" , training_accuracy)

Epoch [10/100], loss:1.5214
Epoch [20/100], loss:1.4955
Epoch [30/100], loss:1.4820
Epoch [40/100], loss:1.5010
Epoch [50/100], loss:1.4697
Epoch [60/100], loss:1.4654
Epoch [70/100], loss:1.5010
Epoch [80/100], loss:1.4654
Epoch [90/100], loss:1.4654
Epoch [100/100], loss:1.5052
The number of correct examples after 100 epochs is : 5711001
The number of samples in the training set is : 6060000
The training accuracy after 100 epochs is : 94.24094059405941


In [15]:
# lets test our model's accuray on a test set after 100 epochs
test_accuracy = check_accuracy(test_loader , model = model)
print("our test accuracy is :" , test_accuracy)

The accuracy of this model on the test is 
our test accuracy is : 94.56
