MNIST database is a large database of handwritten digits that is commonly used for training various image processing systems. It is a collection of 70000 handwritten digits split into training and test set of 60000 and 10000 images respectively.

In [201]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter

In [202]:
'''Converting the image into torch tensor : First the images are converted to numbers and then separated into
RGB color channels. The image pixels are then converted to lie between 0 and 255. These values are then 
scaled down to lie between 0 and 1'''

transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

In [203]:
'''As the dataset consists of no validation set, we will be splitting the training data into 
train and validation set. We will keep 20% of the training set as the validation set.'''

#Downloading the dataset
trainset = datasets.MNIST('.', download=True, train=True, transform=transform)
testset = datasets.MNIST('.', download=True, train=False, transform=transform)

batch_size = 64  #no. of images we want to read in one go.
validation_split = .2
shuffle_dataset = True
random_seed= 42


# Creating data indices for training and validation splits:
dataset_size = len(trainset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]


# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)


#Loading the dataset
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=val_sampler)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)


In [204]:
print("train set : ", len(train_sampler))
print("validation set : ", len(val_sampler))
print("test set : ", len(testset))

train set :  48000
validation set :  12000
test set :  10000


In [205]:
class LogisticRegression(nn.Module):

    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(784, 10)

    def forward(self, x):
        pred = self.linear(x)
        return pred
    
#Defining the function to calculate accuracy

def calc_accuracy(pred, label):
    max_vals, max_indices = torch.max(pred, 1)
    train_acc = (max_indices == label).sum().item()/max_indices.size()[0]
    return train_acc
    

In [214]:
model = LogisticRegression()
print(model)

#Defining loss function and updating through gradient descent

criterion = nn.CrossEntropyLoss()  #combines nn.LogSoftmax() and nn.NLLLoss() in one single class.
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

LogisticRegression(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)


In [215]:
# Training the model

epoch_no = 30
writer = SummaryWriter(log_dir="./runs/lr_0.01")
for epoch in range(epoch_no):
    epoch_train_loss = 0
    running_accuracy = 0
    
    model.train()     # training the model
    for images, label in train_loader:
        features = torch.flatten(images, start_dim=1)
        optimizer.zero_grad()  # setting the gradient to zero else they get accumulated
        pred = model(batch_size * features)   # forward pass : calls the forward method defined in model
        train_loss = criterion(pred, label)  # computing loss using cross entropy
        train_loss.backward()        # backward pass : calculating the gradient
        optimizer.step()       # updating the theta/parameter
        epoch_train_loss += train_loss.item()
    epoch_train_loss = epoch_train_loss / len(train_loader)
    
    
    running_val_loss = 0
    model.eval()
    for images, label in val_loader:
        features = torch.flatten(images, start_dim=1)
        pred = model(features)   # forward pass
        val_loss = criterion(pred, label)
        accuracy = calc_accuracy(pred, label)
        running_val_loss += val_loss.item()
        running_accuracy += accuracy  
    running_val_loss = running_val_loss/len(val_loader) 
    avg_accuracy = running_accuracy/len(val_loader)
    
    #print(f"Epoch {epoch+1} : Train Loss : {epoch_train_loss} & Val Loss : {running_val_loss},  Val accuracy : {avg_accuracy}")
    
    writer.add_scalar('Loss/train', epoch_train_loss, epoch+1)
    writer.add_scalar('Loss/validation', running_val_loss, epoch+1)
    writer.add_scalar('Accuracy/validation', avg_accuracy, epoch+1)