In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

BATCH_SIZE = 128
EPOCHS = 2
LR = 0.001
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_set = torchvision.datasets.MNIST(
    root='./data', train=True, download=True, transform=transform)
test_set  = torchvision.datasets.MNIST(
    root='./data', train=False, download=True, transform=transform)

100%|██████████| 9.91M/9.91M [00:00<00:00, 55.1MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.78MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.9MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.69MB/s]


In [2]:
# Convert whole dataset to tensors
X_train = train_set.data.float().unsqueeze(1) / 255.0      # (N,1,28,28)
y_train = torch.tensor(train_set.targets, dtype=torch.long)

X_test  = test_set.data.float().unsqueeze(1) / 255.0
y_test  = torch.tensor(test_set.targets, dtype=torch.long)

  y_train = torch.tensor(train_set.targets, dtype=torch.long)
  y_test  = torch.tensor(test_set.targets, dtype=torch.long)


In [3]:
#Create the batchs

X_batches = X_train.split(BATCH_SIZE, dim=0)
y_batches = y_train.split(BATCH_SIZE, dim=0)

In [4]:
class SimpleCNN(nn.Module):
  '''
    Simple CNN with 4 convolutional layers and ReLU activation

  '''


  def __init__(self, input_channels, output_channels, hidden_channels=32):
    super(SimpleCNN, self).__init__()
    self.conv1 = nn.Conv2d(input_channels, hidden_channels, kernel_size=3, stride=2)
    self.batchnorm1 = nn.BatchNorm2d(hidden_channels)
    self.conv2 = nn.Conv2d(hidden_channels, hidden_channels*2, kernel_size=3, stride=2)
    self.batchnorm2 = nn.BatchNorm2d(hidden_channels*2)
    self.conv3 = nn.Conv2d(hidden_channels*2, hidden_channels, kernel_size=3, stride=2)
    self.batchnorm3 = nn.BatchNorm2d(hidden_channels)
    self.conv4 = nn.Conv2d(hidden_channels, output_channels, kernel_size=2, stride=2)
    self.batchnorm4 = nn.BatchNorm2d(output_channels)

    self.activation = nn.ReLU()

    self.flatten = nn.Flatten()

  def forward(self, x):
    x = self.conv1(x)
    x = self.batchnorm1(x)
    x = self.activation(x)
    x = self.conv2(x)
    x = self.batchnorm2(x)
    x = self.activation(x)
    x = self.conv3(x)
    x = self.batchnorm3(x)
    x = self.activation(x)
    x = self.conv4(x)

    return self.flatten(x)


In [5]:
# Adam optimizer was used and CrossEntropy Loss for 10 categories.

classifier = SimpleCNN(input_channels=1, output_channels=10).to(DEVICE)
opt = optim.Adam(lr=LR, params=classifier.parameters())
loss_fn = nn.CrossEntropyLoss()

In [6]:
# Model training

losses = []

for epoch in range(EPOCHS):

  classifier.train()

  for x, y in zip(X_batches, y_batches):
    # Move input and target to the correct device
    x = x.to(DEVICE)
    y = y.to(DEVICE)

    output = classifier(x)
    loss = loss_fn(output, y)

    opt.zero_grad()
    loss.backward()
    opt.step()

    losses.append(loss.item())

  print(f'At epoch: {epoch}, Average loss: {np.mean(losses)}')


At epoch: 0, Average loss: 0.28834598723338295
At epoch: 1, Average loss: 0.17777651842874187


In [8]:
# Execution of the model in the test dataset and accuracy calculation.

result = []

with torch.no_grad():
  classifier.eval()

  for x_test in X_test:

    out = classifier(x_test.unsqueeze(0).to(DEVICE))

    result.append(torch.argmax(out).item())

result = [1 if x == y else 0 for x, y in zip(result, y_test)]

print(f'Accuracy with {EPOCHS} epochs is {(sum(result)/len(result))*100}%')

Accuracy with 2 epochs is 98.22999999999999%
