# Import the packages


In [None]:
import torch
import torchvision
from torch import nn, optim
from torchsummary import summary

# Declare variables for the CNN
- **Epoch** is the number of passes of the entire training dataset through the neural network. A pair of
forward and backward propagation indicates a single pass.
- **Batch Size** is the number of samples to work through before updating the weights and biases
associated with the model.
- **Learning Rate** controls how much to change the model parameters in response to the prediction
error each time the model weights are updated.

In [None]:
batch_size = 32
epoch = 30
learning_rate = 0.01

# Load the training set and validation set using Dataset and DataLoader


In [None]:
trans = torchvision.transforms.ToTensor()
train_data = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST(
    'mnist_data', train=True, download=True, transform=trans
    ), batch_size=batch_size
    )
val_data = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST(
    'mnist_data', train=False, download=True, transform=trans
    ), batch_size=batch_size)  

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# Define the CNN using ReLU function for image classification

In [None]:
class ConvNet(nn.Module):
  def __init__(self):
    super(ConvNet, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=1, padding=1)
    self.conv2 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1)
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.relu = nn.ReLU()
    self.linear1 = nn.Linear(6*6*6, 10)
  def forward(self, x):
    x = self.relu(self.conv1(x))
    x = self.pool(x)
    x = self.relu(self.conv2(x))
    x = self.pool(x)
    x = x.view(x.shape[0], -1)
    x = self.linear1(x)
    return x

# Define a function for validating the model

In [None]:
def validate(model, data):
  total = 0
  correct = 0
  for i, (images, labels) in enumerate(data):
    images = images.cuda()
    labels = labels.cuda()
    y_pred = model(images)
    value, pred = torch.max(y_pred, 1)
    total += y_pred.size(0)
    correct += torch.sum(pred == labels)
  return correct * 100 / total

# Initialize the neural network and optimizer

In [None]:
convnet = ConvNet().cuda()
optimizer = optim.Adam(convnet.parameters(), lr=learning_rate)
cross_entropy = nn.CrossEntropyLoss()

# Print the Model Summary

In [None]:
summary(convnet, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 3, 224, 224]              30
              ReLU-2          [-1, 3, 224, 224]               0
         MaxPool2d-3          [-1, 3, 112, 112]               0
            Conv2d-4          [-1, 6, 110, 110]             168
              ReLU-5          [-1, 6, 110, 110]               0
         MaxPool2d-6            [-1, 6, 55, 55]               0
            Linear-7                   [-1, 10]           2,170
Total params: 2,368
Trainable params: 2,368
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 3.83
Params size (MB): 0.01
Estimated Total Size (MB): 4.03
----------------------------------------------------------------


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


# Display the validation accuracy on each epoch

In [None]:
for n in range(epoch):
  for i, (images, labels) in enumerate(train_data):
    images = images.cuda()
    labels = labels.cuda()
    optimizer.zero_grad()
    prediction = convnet(images)
    loss = cross_entropy(prediction, labels)
    loss.backward()
    optimizer.step()
  accuracy = float(validate(convnet, val_data))
  print("Epoch:", n+1, "Loss: ", float(loss.data), "Accuracy:", accuracy)

Epoch: 1 Loss:  0.0835513323545456 Accuracy: 88.06999969482422
Epoch: 2 Loss:  0.1373533010482788 Accuracy: 89.77999877929688
Epoch: 3 Loss:  0.23270569741725922 Accuracy: 90.91999816894531
Epoch: 4 Loss:  0.2337682545185089 Accuracy: 93.18999481201172
Epoch: 5 Loss:  0.08620554953813553 Accuracy: 94.07999420166016
Epoch: 6 Loss:  0.05838495492935181 Accuracy: 94.33999633789062
Epoch: 7 Loss:  0.10215658694505692 Accuracy: 94.7699966430664
Epoch: 8 Loss:  0.10404697060585022 Accuracy: 94.72000122070312
Epoch: 9 Loss:  0.09468015283346176 Accuracy: 94.3499984741211
Epoch: 10 Loss:  0.10854273289442062 Accuracy: 94.18999481201172
Epoch: 11 Loss:  0.1658000349998474 Accuracy: 93.55999755859375
Epoch: 12 Loss:  0.18976950645446777 Accuracy: 93.3699951171875
Epoch: 13 Loss:  0.1318638026714325 Accuracy: 94.32999420166016
Epoch: 14 Loss:  0.11216580867767334 Accuracy: 94.8499984741211
Epoch: 15 Loss:  0.09310924261808395 Accuracy: 94.91999816894531
Epoch: 16 Loss:  0.0903550460934639 Accurac

# Observations

Using ReLU as an Activation function increased the accuracy of the Convolutional Neural Networks.

This is because **non-saturation of its gradient**, which greatly accelerates the convergence of stochastic gradient descent compared to the sigmoid / tanh functions