# Classifier for MNIST

Design a 3-layer neural network for the task of classifying handwritten digits. The number of classes will be 10

Pre-processed data available at https://drive.google.com/drive/folders/11gOutVaBOxEROpNgZCmfE-ALrfI5ZeKr?usp=sharing

# Architecture

In [2]:
import torch
import torch.nn as nn

In [12]:
class Classifier(nn.Module):
    # Write your code for the architecture here...
    def __init__(self, input_size, hidden_1_size, hidden_2_size, output_size):
        super().__init__()
        
        self.input_size = input_size
        self.hidden_1_size = hidden_1_size
        self.hidden_2_size = hidden_2_size
        self.num_classes = output_size

        # nn.Linear is a feedforward layer, i.e. that it captures weights and bias values
        self.fc1 = nn.Linear(self.input_size, self.hidden_1_size)
        self.fc2 = nn.Linear(self.hidden_1_size, self.hidden_2_size)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(self.hidden_2_size, self.num_classes)
        
        # weight initialisation
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        torch.nn.init.xavier_uniform_(self.fc3.weight)

    def forward(self, x):
        # To use a fully connected network, we need a single vector, not a matrix
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x) # => logits
        
        # softmax is not used here as the predefined loss function automatically assigns it

        return x


# Training

In [4]:
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [6]:
data_points = []
class_labels = []

with open('../Lectures/Data/mnist_train_file.txt') as fs:
    for line in fs:
        data = list(map(int, line.strip().split(','))) 
        label = data[0]
        datapoint = data[1:]
        data_points.append(datapoint)
        class_labels.append(label)

In [29]:
data_points_test = []
class_labels_test = []

with open('../Lectures/Data/mnist_test_file.txt') as fs:
    for line in fs:
        data = list(map(int, line.strip().split(','))) 
        label = data[0]
        datapoint = data[1:]
        data_points_test.append(datapoint)
        class_labels_test.append(label)

In [7]:
class Mnist_dataset(Dataset):
    def __init__(self, data_points, class_labels):
        super(Dataset, self).__init__()
        self.data = data_points
        self.labels = class_labels
    
    def __len__(self):
        # returns length of the dataset
        return len(self.labels)
    
    def __getitem__(self, index):
        # retrieves an item of a given index
        d = torch.FloatTensor(self.data[index])
        l = torch.LongTensor([self.labels[index]])
        return d,l

In [8]:
from torch.optim import Adam

In [28]:
def train(clf, train_data, batch_size, epochs, learning_rate=0.0001):
    optimizer = Adam(clf.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    for _ in range(epochs): # the models are trained over multiple epochs..
        train_dataloader = DataLoader(train_data, batch_size=batch_size)
        for d, l in train_dataloader:
            out = clf(d)
            l = l.squeeze(1) # converts the tensor of shape [50 x 1] to [50]
            loss = criterion(out, l)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            

# Evaluation

In [35]:
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

In [36]:
def evaluate(clf, test_data):
    
    test_dataloader = DataLoader(test_data, batch_size=100) # evaluate over a batch of examples which reduces time
    clf.eval()
    true_class = []
    inferred_class = []
    for d, l in test_dataloader:
        # perform forward pass
        out = clf(d)
        true_class.extend(l.squeeze(1).numpy().tolist()) # appending the true class values to a global list
        # calculate softmax to obtain class probability
        inf = F.softmax(out, dim=1)
        # use argmax to obtain the inferred classes
        inf_class = torch.argmax(inf, dim=1)
        inferred_class.extend(inf_class.numpy().tolist())
        # compare with the true labels to calculate accuracy
        # hint... use two lists to store the inferred classes and true classes for each batch 
        # ... once you have calulated across all the batches, calulate the accuracy
    print(f"accuracy on test set: {accuracy_score(true_class, inferred_class)}")    
    

   # Putting it altogether

In [37]:
# sequence of function to be executed is provide below
# Fill in the required arguments
clf = Classifier(28*28, 2048, 256, 10)
train_data = Mnist_dataset(data_points, class_labels)
test_data = Mnist_dataset(data_points_test, class_labels_test)
batch_size = 50
epochs = 5
train(clf, train_data, batch_size, epochs)
evaluate(clf, test_data)

accuracy on test set: 0.9621


# Playing around a bit

1. Read the documentation and try other optimizers like Adam

2. Try with different batch sizes. How does it effect performance?

3. Try with different hidden layer sizes.