In [0]:
#importing all the required libraries

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np

In [0]:
# number of subprocesses to use for data loading
num_workers = 0

# how many samples per batch to load
batch_size = 20

# percentage of training set to use as validation
valid_size = 0.2

# defining our transforms
transform = transforms.ToTensor()

# getting the data
train_data = datasets.MNIST('data', train=True, download=True,
                            transform=transform)
test_data = datasets.MNIST('data', train=False, download=True,
                            transform=transform)

# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                           sampler=train_sampler,
                                           num_workers=num_workers)

valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                           sampler=valid_sampler,
                                           num_workers=num_workers)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
                                           num_workers=num_workers)

In [5]:
# define the structure of the neural net
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()

    self.fc1 = nn.Linear(28 * 28, 512)
    self.fc2 = nn.Linear(512, 512)
    self.fc3 = nn.Linear(512, 10)
    self.dropout = nn.Dropout(0.2)

  # forward method of the class
  # it defines how the input will be passed through the different layers
  def forward(self, x):
    x = x.view(-1, 28 * 28)
    x = F.relu(self.fc1(x))
    x = self.dropout(x)
    x = F.relu(self.fc2(x))
    x = self.dropout(x)
    x = self.fc3(x)

    return x

model = Net()
print(model)

Net(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


In [0]:
# loss function (categorical loss entropy)
criterion = nn.CrossEntropyLoss()

# optimizer function (Stochastic Gradient Descent with learn rate = 0.01)
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [12]:
# create the training and the validation loop
EPOCHS = 50
valid_loss_min = np.Inf # giving validation loss 'Infinity'

for epoch in range(EPOCHS):
  train_loss = 0.0
  valid_loss = 0.0

  # set the model in train mode for train pass
  model.train()

  for features, labels in train_loader:
    # clear the gradients of all optimized variable
    optimizer.zero_grad()
    # make a forward pass: compute the predicted values by giving inputs
    output = model(features)
    # calculate the loss
    loss = criterion(output, labels)
    # backward pass: compute gradient of loss with respect to models parameters
    loss.backward()
    # perform a single optimization step
    optimizer.step()
    # update training loss
    train_loss += loss.item()

  # set the model in eval mode for validation pass
  model.eval()
  for feature, labels in valid_loader:
    output = model(features)
    # calculate the loss
    loss = criterion(output, labels)
    # update the validation loss
    valid_loss = loss.item()

  train_loss = train_loss / len(train_loader)
  valid_loss = valid_loss / len(valid_loader)

  print(f"Epoch :{epoch + 1:2d}/{EPOCHS}\t Training loss : {train_loss:.6f}"
        f"\tValidation loss: {valid_loss:.6f}")
  
  # save the model if validation loss has decreased
  if valid_loss <= valid_loss_min:
    print(f"Validation loss decreased ({valid_loss_min:.6f}) ------>"
          f"({valid_loss:.6f})\t Saving Model....\n")
    torch.save(model.state_dict(), 'model.pth') 
    valid_loss_min = valid_loss

Epoch : 1/50	 Training loss : 0.216909	Validation loss: 0.017518
Validation loss decreased (inf) ------>(0.017518)	 Saving Model....

Epoch : 2/50	 Training loss : 0.184629	Validation loss: 0.013592
Validation loss decreased (0.017518) ------>(0.013592)	 Saving Model....

Epoch : 3/50	 Training loss : 0.163380	Validation loss: 0.016941
Epoch : 4/50	 Training loss : 0.145481	Validation loss: 0.015427
Epoch : 5/50	 Training loss : 0.130361	Validation loss: 0.014969
Epoch : 6/50	 Training loss : 0.117077	Validation loss: 0.014717
Epoch : 7/50	 Training loss : 0.106629	Validation loss: 0.016813
Epoch : 8/50	 Training loss : 0.099433	Validation loss: 0.016447
Epoch : 9/50	 Training loss : 0.090074	Validation loss: 0.017203
Epoch :10/50	 Training loss : 0.084577	Validation loss: 0.018346
Epoch :11/50	 Training loss : 0.078516	Validation loss: 0.018851
Epoch :12/50	 Training loss : 0.072886	Validation loss: 0.015333
Epoch :13/50	 Training loss : 0.069211	Validation loss: 0.021249
Epoch :14/50

In [13]:
# load the model with lowest validation loss
model.load_state_dict(torch.load('model.pth'))

<All keys matched successfully>

In [14]:
# initialize lists to monitor test loss and accuracy
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval()

for data, target in test_loader:
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(len(target)):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# calculate and print avg test loss
test_loss = test_loss/len(test_loader.sampler)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            str(i), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.155779

Test Accuracy of     0: 98% (968/980)
Test Accuracy of     1: 98% (1115/1135)
Test Accuracy of     2: 94% (976/1032)
Test Accuracy of     3: 97% (980/1010)
Test Accuracy of     4: 95% (935/982)
Test Accuracy of     5: 91% (813/892)
Test Accuracy of     6: 95% (917/958)
Test Accuracy of     7: 95% (980/1028)
Test Accuracy of     8: 93% (914/974)
Test Accuracy of     9: 93% (948/1009)

Test Accuracy (Overall): 95% (9546/10000)
