In [None]:
# installing wandb
!pip install wandb -qU

In [None]:
# initiating wandb
import wandb

wandb.login()

wandb.init(
    project='MLP on MNIST',
    config= {
        "epochs": 25,
        "batch_size": 100,
        "lr": 0.001
    }
)

config = wandb.config

In [None]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [None]:
# network architecture variables
input_size = 784 # number of input neurons (28 x 28)
hidden_size = 400 # ~ (784 + 10)/2
output_size = 10

In [None]:
# Import MNIST from torchvision

train_dataset = datasets.MNIST(
    root='./data',
    train=True,
    transform=transforms.ToTensor(),
    download=True
)

test_dataset = datasets.MNIST(
    root='./data',
    train=False,
    transform=transforms.ToTensor(),
    download=True
)

In [None]:
# create training and testing data batches

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=config.batch_size,
    shuffle=True
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=config.batch_size,
    shuffle=True
)

In [None]:
# Define our network architecture
class MNISTNet(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(MNISTNet, self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.fc2 = nn.Linear(hidden_size, hidden_size)
    self.fc3 = nn.Linear(hidden_size, output_size)
    self.relu = nn.ReLU()
    # new addition --> define initial weights
    self.init_weights()

  # weight initialisation function
  def init_weights(self):
    nn.init.kaiming_normal_(self.fc1.weight)
    nn.init.kaiming_normal_(self.fc2.weight)
    nn.init.kaiming_normal_(self.fc3.weight)


  def forward(self, x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    out = self.relu(out)
    out = self.fc3(out)
    return out

In [None]:
# Preparing for training
network = MNISTNet(input_size, hidden_size, output_size)

# Deploying our network in GPU
CUDA = torch.cuda.is_available()

if CUDA:
  network = network.cuda()

# loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(network.parameters(), lr=config.lr)

In [None]:
# training loop
for epoch in range(config.epochs):
  # correct training predictions
  correct_train = 0
  # loss for epoch
  updating_loss  = 0

  # Training
  for index, (images, labels) in enumerate(train_loader):
    # the default shape of images is [100, 1, 28, 28]. 100 -> num of images, 1 --> one color channel
    # 28 , 28 --> image resolution.
    # But, we need to convert the shape into [100, 784] in order to feed into our network
    images = images.view(-1, 28*28)

    # deploying inputs into GPU
    if CUDA:
      images = images.cuda()
      labels = labels.cuda() # Fixed adding labels to cuda
    # feedforward
    outputs = network(images)

    # calculating the loss
    loss = loss_fn(outputs, labels)
    updating_loss += loss.item()

    # Backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Calculating the accuracy
    _, predicted_index = torch.max(outputs.data, 1)

    correct_train += (predicted_index == labels).sum()

  params = {
      "Epoch": epoch+1,
      "Accuracy": (correct_train / len(train_loader)),
      "Loss": updating_loss/ len(train_loader)
  }
  wandb.log(params)

  print(f"Epoch: {epoch+1}/{config.epochs}, Accury: {(correct_train / len(train_loader))}, Loss: {updating_loss/ len(train_loader)}")

  # Validation

  # putting the model into evaluation mode
  network.eval()
  # with torch.no_grad()

  test_correct_predictions = 0
  test_loss = 0

  for index, (images, labels) in enumerate(test_loader):
    if CUDA:
      images = images.cuda()
      labels = labels.cuda()

    images = images.view(-1, 28*28)

    outputs = network(images)

    loss = loss_fn(outputs, labels)

    test_loss += loss.item()


    _, predicted_test_index = torch.max(outputs.data, 1)

    test_correct_predictions += (predicted_test_index == labels).sum()

  if (epoch+1)%5 == 0:

    print('\n')

    params = {
        "Test Accuracy": (test_correct_predictions / len(test_loader)),
        "Test Loss": test_loss / len(test_loader)
    }
    wandb.log(params)

    print(f"Epoch: {epoch+1}/{config.epochs}, Test Accury: {(test_correct_predictions / len(test_loader))}, Test Loss: {test_loss / len(test_loader)}")

    print('\n')



wandb.finish()
print('Training is Done')

Epoch: 1/25, Accury: 99.53166961669922, Loss: 0.01460577951807257
Epoch: 2/25, Accury: 99.46833038330078, Loss: 0.014856943211513377
Epoch: 3/25, Accury: 99.6050033569336, Loss: 0.012367723842695947
Epoch: 4/25, Accury: 99.62833404541016, Loss: 0.011147954458325936
Epoch: 5/25, Accury: 99.625, Loss: 0.011820815770823476


Epoch: 5/25, Test Accury: 98.36000061035156, Test Loss: 0.07589730351959588


Epoch: 6/25, Accury: 99.68000030517578, Loss: 0.009318926352282991
Epoch: 7/25, Accury: 99.67333221435547, Loss: 0.01067682987684293
Epoch: 8/25, Accury: 99.788330078125, Loss: 0.006395650459397378
Epoch: 9/25, Accury: 99.60166931152344, Loss: 0.012167764481869956
Epoch: 10/25, Accury: 99.72166442871094, Loss: 0.008771116198013261


Epoch: 10/25, Test Accury: 98.0999984741211, Test Loss: 0.09817238001152873


Epoch: 11/25, Accury: 99.74333190917969, Loss: 0.008064390485060358
Epoch: 12/25, Accury: 99.74833679199219, Loss: 0.007662966642583342
Epoch: 13/25, Accury: 99.79166412353516, Loss: 0.

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Accuracy,▁▅▆▇▇▇▇██████████████████████████
Epoch,▁▁▂▂▂▂▃▃▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
Loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Test Accuracy,▁▆█▇▇▇▆
Test Loss,▅▁▁▄▅▆█

0,1
Accuracy,99.83
Epoch,25.0
Loss,0.00625
Test Accuracy,98.01
Test Loss,0.12787


Training is Done


In [None]:
# Saving the model
torch.save({
    'model_state_dict': network.state_dict()
}, 'mlp_mnist.pth')

In [None]:
# Save model checkpoints


# Load checkpints
# Start training for new 500k data.
