# Lab 2 - Digit Recognition Using CNN

In [None]:
# Loading necessary packages needed for building a CNN network 
import torch
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim
import os
import glob
import numpy as np  
from skimage import io 
from torch.utils.data import Dataset, DataLoader
from skimage import transform
from torch.utils.data import random_split
from torchvision import transforms, utils

In [1]:
# Loading MNIST Digit Recognition dataset from google drive 
from google.colab import files,drive
drive.mount('/content/drive')

Mounted at /content/drive


In [38]:
# CPU/GPU device assignment 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Defining network architecture 
class Net(nn.Module):

  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 30, 3)
    self.fc1 = nn.Linear(30*5*5, 100)
    self.fc2 = nn.Linear(100, 75)
    self.fc3 = nn.Linear(75, 10)

  # Defining forward method for activation functions 
  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(-1, self.num_flat_features(x))
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.log_softmax(self.fc3(x))
    return x

  # Defining function to flatten output of CNN for Fully connected Feed Forward Neural Network
  def num_flat_features(self, x):
    size = x.size()[1:]
    num_features = 1
    for s in size:
      num_features *= s

    return num_features

# Creating the network structure
net = Net().to(device)
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 30, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=750, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=75, bias=True)
  (fc3): Linear(in_features=75, out_features=10, bias=True)
)


In [39]:
# Creating a customized dataset
class MNISTDataset(Dataset):
  # Overriding init function to define extra parameters
  def __init__(self, dir, transform=None):
    self.dir = dir
    self.transform = transform
  
  # Getting length
  def __len__(self):
    files = glob.glob(self.dir+'/*.jpg')[:100] 
    return len(files)

  # Getting customized instances 
  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    all_files = glob.glob(self.dir+'/*.jpg')[:100]
    img_fname = os.path.join(self.dir, all_files[idx])
    image = io.imread(img_fname)
    digit = int(self.dir.split('/')[-1].strip())
    label = np.array(digit)
    instance = {'image':image,'label':label}
    
    if self.transform:
      instance = self.transform(instance)

    return instance

In [40]:
# Defining custom rescale transformation 
class Rescale(object):
  def __init__(self, output_size):
    assert isinstance(output_size, (int, tuple))
    self.output_size = output_size

# Rescaling an instance with output size
  def __call__(self, sample):
    image, label = sample['image'], sample['label']
    h, w = image.shape[-2:]
    if isinstance(self.output_size, int):
      if h > w:
        new_h, new_w = self.output_size*h/w, self.output_size
      else:
        new_h, new_w = self.output_size, self.output_size*w/h
    else:
      new_h, new_w = self.output_size

    new_h, new_w = int(new_h), int(new_w)
    new_image = transform.resize(image, (new_h, new_w))

    return {'image': new_image, 'label':label}

# Coverting images and labels to tensors 
class ToTensor(object):
  def __call__(self, sample):
    image, label = sample['image'], sample['label']
    image = image.reshape((1,image.shape[0],image.shape[1]))
    return {'image':torch.from_numpy(image) ,'label': torch.from_numpy(label)}

In [41]:
# Defining batch size
batch_size = 32
list_datasets = []

# Getting respective datasets for each digits 
for i in range(10):
  cur_ds = MNISTDataset('/content/drive/My Drive/MNIST/trainingset/' + str(i), transform = transforms.Compose([Rescale(28), ToTensor()]))
  list_datasets.append(cur_ds)

dataset = torch.utils.data.ConcatDataset(list_datasets)
print(len(dataset))

# Defining the training set size, validation set size 
train_size = int(len(dataset)*0.7)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset,[train_size, val_size])

# Loading the training and validation set in train dataloader, val dataloader
train_dataloader = DataLoader(train_dataset,batch_size,shuffle=True,num_workers=1)
val_dataloader = DataLoader(val_dataset,batch_size,shuffle=True,num_workers=1)

1000


In [42]:
# Defining epochs, learning rate and loss function for training
epochs = 5
lr = 1e-3
optimizer = optim.Adam(net.parameters(), lr = lr, weight_decay = 1e-5)
criterion = nn.CrossEntropyLoss()

for epoch in range(epochs):
# Training the network 
  net.train()

  running_loss = 0.0
  for batch_idx, batch in enumerate(train_dataloader):
    inputs, targets = batch["image"].to(device, dtype = torch.float), batch["label"].to(device, dtype = torch.long)

# Backpropogating the network 
    optimizer.zero_grad()
    predicted_outputs = net(inputs)
    loss = criterion(predicted_outputs, targets)
    loss.backward()
    optimizer.step()

# Getting the training accuracy and loss 
    running_loss += loss.item()
    if (batch_idx + 1) % 10 == 0:
      print('epoch %d, batch: %d, training loss: %.3f'%(epoch + 1, batch_idx + 1, running_loss/10))
      running_loss = 0.0

# Evaluating the network 
  net.eval()

  correct = [0.0]*10
  total = [0.0]*10

  with torch.no_grad():
    for batch_idx, batch in enumerate(val_dataloader):
      images, labels = batch["image"].to(device, dtype = torch.float), batch["label"].to(device, dtype = torch.long)
      predicted_outputs = net(images)

      _, predicted_labels = torch.max(predicted_outputs, 1)
      c = (predicted_labels == labels)

      # Counting all correctly predicted labels
      for i in range(len(labels)):
        label = labels[i]
        correct[label] += c[i].item()
        total[label] += 1

# Getting the validation accuracy and loss 
  for i in range(10):
    print('Validation accuracy for digit %d: %.2f'%(i, 100*correct[i]/total[i])) 
    



epoch 1, batch: 10, training loss: 2.293
epoch 1, batch: 20, training loss: 2.246
Validation accuracy for digit 0: 96.77
Validation accuracy for digit 1: 90.91
Validation accuracy for digit 2: 70.97
Validation accuracy for digit 3: 68.75
Validation accuracy for digit 4: 63.33
Validation accuracy for digit 5: 0.00
Validation accuracy for digit 6: 90.48
Validation accuracy for digit 7: 62.96
Validation accuracy for digit 8: 50.00
Validation accuracy for digit 9: 33.33
epoch 2, batch: 10, training loss: 2.021
epoch 2, batch: 20, training loss: 1.674
Validation accuracy for digit 0: 96.77
Validation accuracy for digit 1: 90.91
Validation accuracy for digit 2: 80.65
Validation accuracy for digit 3: 81.25
Validation accuracy for digit 4: 96.67
Validation accuracy for digit 5: 43.33
Validation accuracy for digit 6: 61.90
Validation accuracy for digit 7: 85.19
Validation accuracy for digit 8: 0.00
Validation accuracy for digit 9: 3.70
epoch 3, batch: 10, training loss: 1.074
epoch 3, batch: 20