Import data in PNG form and split into Train, Test, Validate and load


In [1]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import ToTensor, Normalize, Compose, Grayscale

train_set = '../Data/mnist-png-format/train'
test_set = '../Data/mnist-png-format/test'
 
batch_size = 256
workers = 8

transforms = Compose([
                        Grayscale(num_output_channels=1),   # PNG file is RGB 3 layer convert to greyscal 1 layer
                        ToTensor(),           
                        Normalize(mean=(0.5), std=(0.5)), 
                        ])

train_dataset = ImageFolder(train_set, transform=transforms)
test_dataset = ImageFolder(test_set, transform=transforms)

# Split Train dataset set into two (Train=85/Validation=15% split)
train_size = int(len(train_dataset) * 0.85) 
validation_size = (len(train_dataset) - train_size) 
train, validation = random_split(train_dataset, [train_size, validation_size])

# load data into usable format, mix/shuffle data so data is not in order 
train_data = DataLoader(train, batch_size = batch_size, shuffle = True, num_workers = workers)
val_data  = DataLoader(validation, batch_size = batch_size, shuffle = True, num_workers = workers)
test_data  = DataLoader(test_dataset, batch_size = batch_size, shuffle = True, num_workers = workers)

In [2]:
from torch import nn, optim
import torch

class SingleLayerModel(nn.Module):
    def __init__(self, neurons):
        super().__init__()
        self.hidden_1 = nn.Linear(28*28, neurons) # 28x28 Input image, number of neurons
        self.output = nn.Linear(neurons, 10) # 10 class output 0-9


    def forward(self, x):
        x = x.view(x.shape[0], -1) 
        x = nn.functional.relu(self.hidden_1(x))
        y = self.output(x)
        return y

In [3]:
# Optimisable parameters
learning_rate = [0.0001, 0.001, 0.01]
train_epochs = 5
neurons = [10, 30, 50, 100]

ModelMLP  = SingleLayerModel(neurons[3])
optimiser = optim.Adam(ModelMLP.parameters(), lr=learning_rate[0])
loss_func = nn.CrossEntropyLoss()

In [6]:
def evaluate_model(model, eval_dataset):
    model.eval() # Set model mode to evaluation not training
    correct_batch = 0
    loss_batch = 0
    with torch.no_grad():                 # Gradient graph not require for evaluation should reduce memory usage
      for (imgs, labels) in eval_dataset: # loop through each batch  
        pred_y = model(imgs)
        loss = loss_func(pred_y, labels)

        pred = torch.max(pred_y, 1)[1]
        correct_batch += (pred == labels).sum().item() # Calculate correct prediction where prediction == label 
        loss_batch += loss.item()

    return correct_batch / len(eval_dataset.dataset), loss_batch / len(eval_dataset.dataset)

In [7]:
  for i in range(train_epochs): # training epochs
    correct_batch = 0
    loss_batch = 0

    for (imgs, labels) in train_data: # loop through each batch  
      ModelMLP.train()
      pred_y = ModelMLP(imgs)
      loss = loss_func(pred_y, labels)

      optimiser.zero_grad()
      loss.backward()
      optimiser.step()

      pred = torch.max(pred_y, 1)[1]
      correct_batch += (pred == labels).sum().item() # Calculate correct prediction where prediction == label 
      loss_batch += loss.item()

    val_acc, val_loss = evaluate_model(ModelMLP, val_data)

    print('Train accuracy = {0:.2%}, Train loss = {1:.6f} | \
           Validation accuracy = {2:.2%}, Validation loss = {3:.6f}'.format(correct_batch / len(train_data.dataset), loss_batch / len(train_data.dataset), val_acc, val_loss ))


Train accuracy = 85.54%, Train loss = 0.003 |          Validation accuracy = 87.34%, Validation loss = 0.002
Train accuracy = 87.97%, Train loss = 0.002 |          Validation accuracy = 88.53%, Validation loss = 0.002
Train accuracy = 89.09%, Train loss = 0.002 |          Validation accuracy = 89.63%, Validation loss = 0.002
Train accuracy = 89.90%, Train loss = 0.001 |          Validation accuracy = 90.46%, Validation loss = 0.001
Train accuracy = 90.40%, Train loss = 0.001 |          Validation accuracy = 90.66%, Validation loss = 0.001


In [34]:
test, _ = evaluate_model(ModelMLP, test_data)
print('Test accuracy = {0:.2%}'.format(test))

Test accuracy = 93.03%
