#Multi-layer NN for Digit Classification using MNIST

- MNIST
- Dataloader to load dataset
- Apply Transformation
- Implement Multilayer NN with activation functions
- Set up loss and optimizer
- Training Loop (Batch Training)
- Evaluate Model
- GPU Support
- TensorBoard
- Save & Load Model

#Setting up TensorBoard:

In [None]:
!pip install tensorboard

In [3]:
# Load the TensorBoard notebook extension

%load_ext tensorboard

In [4]:
!rm -rf ./myLogs/ # Clear any logs from previous runs

In [None]:
%tensorboard --logdir myLogs/fit #To start tensorBoard

#Importing Libraries:

In [37]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from torch.utils.tensorboard import SummaryWriter
import sys

In [17]:
writer = SummaryWriter("myLogs/fit/MNIST")

#Device Configuration:

In [18]:
#device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#if gpu available name it cuda otherwise cpu
print(device)

cpu


#Hyper Parameters:

In [19]:
#hyper parameters
input_size = 784 #because image size is 28 * 28 = 784
hidden_size = 100
num_classes = 10 #digits 0 - 9
num_epochs = 2
batch_size = 100
learning_rate = 0.001

#Importing MNIST Dataset:

In [20]:
#MNIST
#training set
train_dataset = torchvision.datasets.MNIST(
    root="/data", #path for storing dataset
    train=True, #implies it is training dataset
    transform=transforms.ToTensor(), #applying transformation
    download=True) #download dataset if it's not available

In [21]:
#testing set
test_dataset = torchvision.datasets.MNIST(
    root="/data", #path for accessing dataset
    train=False, #implies it is testing dataset
    transform=transforms.ToTensor()) #applying transformation
    #dataset already downloaded

In [22]:
#DataLoader
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False)

In [23]:
examples = iter(train_loader)
samples, labels = next(examples)
print(samples.shape, labels.shape) #batch size is 100, channel is 1, 28 x 28 image size

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [None]:
for i in range(6):
  plt.subplot(2,3, i+1) #divides the fig in 2 rows, 3 columns
  #i+1 specifies position of current sub plot
  plt.imshow(samples[i][0], cmap='gray') #cmap = gray, displays image in grayscale
#plt.show()

In [None]:
img_grid = torchvision.utils.make_grid(samples)
writer.add_image("mnist_images", img_grid) #adding image to tensorboard
writer.close()

In [None]:
%tensorboard --logdir myLogs/fit #To start tensorBoard

#Neural Network:

In [27]:
class NN(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NN, self).__init__()
    self.l1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.l2 = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out)
    #no need to apply softmax
    #we will use cross entropy loss function which automatically applies softmax
    return out

In [28]:
model = NN(input_size, hidden_size, num_classes).to(device)

#Loss & Optimizer:

In [29]:
#Loss & Optimizer:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [30]:
writer.add_graph(model, samples.reshape(-1, 28*28)) #adding model graph to tensorboard
writer.close()

In [None]:
%tensorboard --logdir myLogs/fit #To start tensorBoard

#Training Loop:

In [32]:
n_total_steps = len(train_loader) #60000 (total samples) / 100 (batch size) = 600

running_loss = 0.0
running_correct = 0

for epoch in range(num_epochs): #iterate through epochs
  running_loss = 0.0  # Track loss for the epoch
  for i, (images, labels) in enumerate(train_loader): #iterate through batches
    #shape: [100, 1, 28, 28]
    #input_size = 784
    #image tensor needs [100, 784]

    #reshaping:
    images = images.reshape(-1, 28*28).to(device) #pushes to gpu if available
    labels = labels.to(device)

    #forward
    outputs = model(images)
    loss = criterion(outputs, labels)

    #backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

    _, predictions = torch.max(outputs, 1)

    running_correct += (predictions == labels).sum().item()

    if(i+1) % 100 == 0:
      print(f'epoch: {epoch + 1} / {num_epochs}, step: {i+1} / {n_total_steps}, loss = {loss.item():.4f}')
      writer.add_scalar('Training Loss', running_loss / 100, epoch * n_total_steps + i) #For each 100 steps, add training loss to tensorboard
      writer.add_scalar('Accuracy', running_correct / 100, epoch * n_total_steps + i) #add accuracy to tensorboard
      running_loss = 0.0
      running_correct = 0

epoch: 1 / 2, step: 100 / 600, loss = 0.4212
epoch: 1 / 2, step: 200 / 600, loss = 0.2478
epoch: 1 / 2, step: 300 / 600, loss = 0.4007
epoch: 1 / 2, step: 400 / 600, loss = 0.3441
epoch: 1 / 2, step: 500 / 600, loss = 0.3218
epoch: 1 / 2, step: 600 / 600, loss = 0.2702
epoch: 2 / 2, step: 100 / 600, loss = 0.2043
epoch: 2 / 2, step: 200 / 600, loss = 0.1693
epoch: 2 / 2, step: 300 / 600, loss = 0.1559
epoch: 2 / 2, step: 400 / 600, loss = 0.2066
epoch: 2 / 2, step: 500 / 600, loss = 0.2217
epoch: 2 / 2, step: 600 / 600, loss = 0.3011


In [34]:
writer.close()

#Evaluate Model:

In [38]:
#for precision & recall:
labels = []
preds = []

In [None]:
#test
with torch.no_grad():
  n_correct = 0 #no. of correct predictions
  n_samples = 0
  n_class_correct = [0 for i in range(10)]
  n_class_samples = [0 for i in range(10)]

  for images, labels1 in test_loader:
    images = images.reshape(-1, 28 * 28).to(device)
    labels1 = labels1.to(device)
    outputs = model(images)

    #torch.max returns max value(highest probability) and its index(class label)
    _, predictions = torch.max(outputs, 1)
    n_samples += labels1.shape[0] #no. of samples in current batch
    n_correct += (predictions == labels1).sum().item() #+1 for each correct prediction

    labels.append(predictions)

    #since output of model is in raw values, we need to convert it to probabilites (value b/w 0 & 1)
    class_preds = [F.softmax(output, dim=0) for output in outputs] #using softmax to convert to probabilities
    preds.append(class_preds)

  labels = torch.cat(labels) #converting from list to 1D tensor
  preds = torch.cat([torch.stack(batch) for batch in preds]) #converting to 2D tensor

  classes = range(10)

  for i in classes:
    labels_i = labels == i
    preds_i = preds[:, i]
    writer.add_pr_curve(str(i), labels_i, preds_i, global_step=0)
    writer.close

  acc = 100.0 * n_correct / n_samples
  print(f'accuracy = {acc}')

#Save & Load Model:

In [None]:
# Save model parameters
torch.save(model.state_dict(), "MNIST_Model_Parameters.pt")

In [None]:
#Load model:
loaded_model = NN(input_size, hidden_size, num_classes)
loaded_model.load_state_dict(torch.load("MNIST_Model_Parameters.pt"))
loaded_model.eval()

  loaded_model.load_state_dict(torch.load("MNIST_Model_Parameters.pt"))


NN(
  (l1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=100, out_features=10, bias=True)
)

In [None]:
# Test the loaded model
with torch.no_grad():
  n_correct = 0 #no. of correct predictions
  n_samples = 0

  for images, labels in test_loader:
    images = images.reshape(-1, 28 * 28).to(device)
    labels = labels.to(device)
    outputs = model(images)

    #torch.max returns max value(highest probability) and its index(class label)
    _, predictions = torch.max(outputs, 1)
    n_samples += labels.shape[0] #no. of samples in current batch
    n_correct += (predictions == labels).sum().item() #+1 for each correct prediction

  acc = 100.0 * n_correct / n_samples
  print(f'accuracy = {acc}')

accuracy = 95.28
