# Simple Convolutional Neural Network Layers using MNIST dataset

In [3]:
# imports
# create a convolutional neural network
# set device
# Hyperparameters
# Initialize network
# Loss and optimizer
# Train Network
# Check accuracy on a test set to see how good our model is

In [4]:
# let's create a convolutional neural networks
import torch
class ConvNet(torch.nn.Module):
  def __init__(self , input_channels , hidden_size ,number_classes):
    super(ConvNet , self).__init__()
    # conv == > ReLU ==> Pooling 
    self.conv1 = torch.nn.Conv2d(in_channels = input_channels , out_channels = 10 , kernel_size = (3 , 3) , stride = (1 , 1) , padding = (1 , 1))
    self.activation1 = torch.nn.ReLU()
    self.pool1 = torch.nn.MaxPool2d(kernel_size = (2,2) , stride = (2 , 2))


    # flatten it out 
    self.linear1 = torch.nn.Linear(in_features = 10*14*14 , out_features = hidden_size)
    self.activation3 = torch.nn.ReLU()
    self.linear2 = torch.nn.Linear(in_features= hidden_size , out_features =number_classes)
    self.activation4 = torch.nn.Softmax(dim = 1)

  def forward(self , x):
    x = self.pool1(self.activation1(self.conv1(x)))
    x = x.view(x.size(0) , -1)
    x = self.activation3(self.linear1(x))
    outs = self.activation4(self.linear2(x))
    return outs

In [5]:
# let's create a fake dataset for now
input_channels = 1
number_classes = 10
hidden_size = 20
convolution = ConvNet(input_channels , hidden_size , number_classes)
# let's create a fake dataset
x = torch.rand(64 , 1 , 28 , 28)
print(convolution(x).shape)

torch.Size([64, 10])


In [6]:
# creating a dataset
import torchvision
from torchvision.transforms import transforms
data_train = torchvision.datasets.MNIST(root = './data' , train = True , download = True , transform = transforms.ToTensor())
data_test = torchvision.datasets.MNIST(root = './data' , train = False , download = False , transform = transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [7]:
# loading the data
batch_size =  64
train_load = torch.utils.data.DataLoader(data_train , batch_size = batch_size , shuffle = True , num_workers = 0 , pin_memory = True)
test_load = torch.utils.data.DataLoader(data_test , batch_size = batch_size , shuffle = True , num_workers = True , pin_memory = True)

In [9]:
# get default device
def get_default_device():
  if torch.cuda.is_available():
    return torch.device("cuda")
  else:
    return torch.device('cpu')

my_device = get_default_device()
print(my_device)

cuda


In [10]:
def to_device(data , device):
  return data.to(device)

In [11]:
# initialize the network
input_channels = 1
hidden_size = 20
number_of_classes = 10
model = ConvNet(input_channels = input_channels , hidden_size = hidden_size , number_classes = number_of_classes)
model = to_device(model , my_device)

In [12]:
# define the loss and the optimizer
learning_rate = 0.01
crossentropyloss = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters() ,lr = learning_rate)

In [13]:
# let's train it with epochs
num_epochs = 1
num_correct = 0
total_samples = 0
for epoch in range(num_epochs):
  for inputs , targets in train_load:
    # moving the batches to the GPU
    inputs = to_device(inputs , my_device)
    targets = to_device(targets , my_device)
    prediction = model(inputs)
    # loss function
    loss = crossentropyloss(prediction , targets)
    # derivative of the gradient
    loss.backward()
    # updating the weights
    optim.step()
    # resetting the gradients
    optim.zero_grad()

    _ , scores= prediction.max(1)
    num_correct += (scores == targets).sum().item()
    total_samples += targets.size(0)

acc = (num_correct / total_samples)*100
print("The number of correct predicitons are :" , num_correct)
print("The total number of samples are : ", total_samples)
print("The accuracy of our model in one epochs is :" , acc)

The number of correct predicitons are : 49977
The total number of samples are :  60000
The accuracy of our model in one epochs is : 83.295


In [18]:
# Model Evaluation

Once the model is fit , it can be evaluated on the test dataset. This can be achieved by using the DataLoader for the test set , then computing the predictions to the expected values of the test set and calculating the performance metric. 

In [19]:
# Model predictions

A fit model can be used to make a predictions on a new data. Forexample:- we might have a single image or a single row of data and want to make a prediction. 

In [16]:
# Let's evaluate our model by one epoch
num_correct = 0
num_samples = 0
model.eval()

for inputs , targets in test_load:
  inputs = to_device(inputs , my_device)
  targets = to_device(targets , my_device)
  
  scores = model(inputs)
  _ , predictions = scores.max(dim = 1)
  num_correct += (predictions ==targets).sum().item()
  num_samples += targets.size(0)
acc = ( num_correct / num_samples) * 100
print("The number of correct predictions on the test are :" , num_correct)
print("The number of samples on the test set are :" , num_samples)
print("The accuracy of our model on a test set are :" , acc )

The number of correct predictions on the test are : 8633
The number of samples on the test set are : 10000
The accuracy of our model on a test set are : 86.33


There are certain layer that behave differently in training and evaluation modes , such as the dropout and batch normalize layers. 

`Dropout Layer ` :- The dropout layer use for regualarization with argument 'p' that denotes the drop probability , which is used to determine the probability of dropping the units during trainnig. 

* One important point to remember is that units may drop randomly during training only , for the evaluation or inference phase , all the hidden units must be active. To ensure that the overall activations are on the same scale during training and predictions , the activations of the active neurons have to be scaled approppriately. 

* When calling this layer , it's behaviour can be controlled via model.train() and model.eval() to specify whether this call will be made during training and during inference. when using dropout , alternating between these two modes is crucial to ensure that it behaves correctly , for instance , nodes are only randomly dropped during training not evaluation or inference. 

`Batch Normalization Layer ` :- The PyTorch API provides a class , nn.BatchNorm2d() that we can use a layer when defining our models , note that the behaviour of updating the learnable parameters depends on the wheather the model is training model or not. These parameters are learned only during training are then used for normalization during evaluation. because normalization used also for evaluation.

* Note that using the designated setting for training model.train() and evaluation model.eval() will automatically set the mode for the dropout layer and batch normalization layers and rescale appropriately so that we do not have to worry about that at all. 

`with torch.no_grad()` The validation loop looks very similar to training but is somewhat simplified. The key difference is that the validation is read only , specifically the loss value returned is not used (No gradient on those loss functions for gradient purpose) and the weights are not updated. 

In [17]:
# let's train the model with multiple epochs
num_epochs = 100
num_correct = 0
num_samples = 0
for epochs in range(num_epochs):
  for inputs , targets in train_load:
    inputs = to_device(inputs , my_device)
    targets = to_device(targets , my_device)
    predictions = model(inputs)
    # calculate the loss
    loss = crossentropyloss(predictions , targets)
    # calculate the gradient
    loss.backward()
    # update the weights
    optim.step()
    # resetting the gradients
    optim.zero_grad()

    _ , scores = predictions.max(dim = 1)
    num_correct += (scores == targets ).sum().item()
    num_samples += targets.size(0)
    acc = (num_correct / num_samples)*100


  if(epochs+1)%10==0:
    print("Epoch [{}/{}], loss:{:.4f} , acc:{:.4f}" .format(epochs+1 , num_epochs , loss.item() , acc))


Epoch [10/100], loss:1.4612 , acc:96.4457
Epoch [20/100], loss:1.4612 , acc:96.6442
Epoch [30/100], loss:1.4612 , acc:96.7147
Epoch [40/100], loss:1.4924 , acc:96.7198
Epoch [50/100], loss:1.5862 , acc:96.6809
Epoch [60/100], loss:1.4924 , acc:96.6364
Epoch [70/100], loss:1.4612 , acc:96.6370
Epoch [80/100], loss:1.5549 , acc:96.6381
Epoch [90/100], loss:1.4612 , acc:96.6428
Epoch [100/100], loss:1.4924 , acc:96.6142


In [24]:
# Check the accuracy on the test set 
def check_accuracy(loader , model):
  num_correct = 0
  num_samples = 0
  model.eval()

  with torch.no_grad():
    for inputs , labels in loader:
      inputs = to_device(inputs , my_device)
      labels = to_device(labels , my_device)
      scores = model(inputs)
      _ , predictions = scores.max(1)
      num_correct += (predictions == labels).sum().item()
      num_samples += labels.size(0)
  
  print("The accuracy of this model on the test is ")
  acc = (num_correct / num_samples)*100
  return acc

test_accuracy = check_accuracy(test_load, model = model)
print("our test accuracy is :" , test_accuracy)

The accuracy of this model on the test is 
our test accuracy is : 96.31
