In [None]:
# Necessary imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from prettytable import PrettyTable
from tqdm.auto import tqdm
import time
import itertools

In [None]:
# Device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Fix seed for reproducibility
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))])

# Download CiFAR-100 Dataset from PyTorch
training_data = datasets.CIFAR100(
                root="data",                        # Set root directory of data
                train=True,                         # Get training dataset
                download=True,                      # Download the data
                transform=transform)                # Transform the dataset into tensors and normalise

testing_data = datasets.CIFAR100(
                root="data",                        # Set root directory of data
                train=False,                        # Get testing dataset
                download=True,                      # Download the data
                transform=transform)                # Transform the dataset into tensors and normalise

print(f"Length of training data: {len(training_data)}")
print(f"Length of testing data: {len(testing_data)}")

Files already downloaded and verified
Files already downloaded and verified
Length of training data: 50000
Length of testing data: 10000


In [None]:
# transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2470, 0.2435, 0.2616))])

# # Download CiFAR-10 Dataset from PyTorch
# training_data = datasets.CIFAR10(
#                 root="data",                        # Set root directory of data
#                 train=True,                         # Get training dataset
#                 download=True,                      # Download the data
#                 transform=transform)                # Transform the dataset into tensors and normalise

# testing_data = datasets.CIFAR10(
#                 root="data",                        # Set root directory of data
#                 train=False,                        # Get testing dataset
#                 download=True,                      # Download the data
#                 transform=transform)                # Transform the dataset into tensors and normalise

# print(f"Length of training data: {len(training_data)}")
# print(f"Length of testing data: {len(testing_data)}")

In [None]:
# Shape of an image ([colour channels, height, width])
# image, label = training_data[0]
# print(f"Image Shape: {image.shape}")
# print(f"Label: {training_data.classes[label]}")

# plt.figure(figsize=(3,3))
# plt.imshow(transforms.ToPILImage()(image))
# plt.title(training_data.classes[label])
# plt.axis(False)

In [None]:
NUM_CLASSES = len(training_data.classes)
print(NUM_CLASSES)
training_set, validation_set = torch.utils.data.random_split(training_data, [40000, 10000])

# Put training and testing data in dataloaders for efficient training
training_dataloader = DataLoader(dataset=training_set, batch_size=32, shuffle=True)
validation_dataloader = DataLoader(dataset=validation_set, batch_size=32, shuffle=False)
testing_dataloader = DataLoader(dataset=testing_data, batch_size=32, shuffle=False)

100


In [None]:
# Define VGG-11 architecture
# 11 Weight layers -> Flatten -> 3 FC Layers -> Softmax
# VGG_11 = 64, MaxPool, 128, MaxPool, 256, 256, MaxPool, 512, 512, MaxPool, 512, 512, MaxPool

class VGG_11(nn.Module):
    def __init__(self, in_features = 3, num_classes = 100):
        super().__init__()

        # Every conv2d layer has a kernel size 3, stride 1 and padding 1
        # All 5 max pool layers has kernel size 2 and stride 2
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            )

        # After the last max pool layer, VGG has 3 fully connected layer
        # in_features = last_out_channels*(img_height/stride**num_max_pool)*(img_width/stride**num_max_pool)
        # 512*(32/2**5)*(32/2**5)

        self.fc = nn.Sequential(
            nn.Linear(in_features=512*1*1, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
            )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x

In [None]:
# Define VGG-13 architecture
# 13 Weight layers -> Flatten -> 3 FC Layers -> Softmax
# VGG_13 = 64, 64, MaxPool, 128, 128, MaxPool, 256, 256, MaxPool, 512, 512, MaxPool, 512, 512, MaxPool

class VGG_13(nn.Module):
    def __init__(self, in_features = 3, num_classes = 100):
        super().__init__()

        # Every conv2d layer has a kernel size 3, stride 1 and padding 1
        # All 5 max pool layers has kernel size 2 and stride 2
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            )

        # After the last max pool layer, VGG has 3 fully connected layer
        # in_features = last_out_channels*(img_height/stride**num_max_pool)*(img_width/stride**num_max_pool)
        # 512*(32/2**5)*(32/2**5)

        self.fc = nn.Sequential(
            nn.Linear(in_features=512*1*1, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
            )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x

In [None]:
# Define VGG-16 architecture
# 16 Weight layers -> Flatten -> 3 FC Layers -> Softmax
# VGG_16 = 64, 64, MaxPool, 128, 128, MaxPool, 256, 256, 256, MaxPool, 512, 512, 512, MaxPool, 512, 512, 512, MaxPool

class VGG_16(nn.Module):
    def __init__(self, in_features = 3, num_classes = 100):
        super().__init__()

        # Every conv2d layer has a kernel size 3, stride 1 and padding 1
        # All 5 max pool layers has kernel size 2 and stride 2
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            )

        # After the last max pool layer, VGG has 3 fully connected layer
        # in_features = last_out_channels*(img_height/stride**num_max_pool)*(img_width/stride**num_max_pool)
        # 512*(32/2**5)*(32/2**5)

        self.fc = nn.Sequential(
            nn.Linear(in_features=512*1*1, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
            )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x

In [None]:
# Define VGG-19 architecture
# 19 Weight layers -> Flatten -> 3 FC Layers -> Softmax
# VGG_19 = 64, 64, MaxPool, 128, 128, MaxPool, 256, 256, 256, 256, MaxPool, 512, 512, 512, 512, MaxPool, 512, 512, 512, 512, MaxPool

class VGG_19(nn.Module):
    def __init__(self, in_features = 3, num_classes = 100):
        super().__init__()

        # Every conv2d layer has a kernel size 3, stride 1 and padding 1
        # All 5 max pool layers has kernel size 2 and stride 2
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            )

        # After the last max pool layer, VGG has 3 fully connected layer
        # in_features = last_out_channels*(img_height/stride**num_max_pool)*(img_width/stride**num_max_pool)
        # 512*(32/2**5)*(32/2**5)

        self.fc = nn.Sequential(
            nn.Linear(in_features=512*1*1, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
            )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x

In [None]:
# Function to print out layer and number of parameters in a table
def model_summary(model):
    summary = PrettyTable(["Layers", "Parameters"])
    total_params = 0
    for layer, parameter in model.named_parameters():
        # Skip non-trainable parameters
        if not parameter.requires_grad:
            continue
        # Returns num of params in tensor
        params = parameter.numel()
        summary.add_row([layer, params])
        total_params += params
    print(summary)
    print(f"Total Params: {total_params}")

In [None]:
# Prints the time difference between training start time and training end time
def print_train_time(start: torch.float, end: torch.float):
    total = end - start
    # print(f"Training time: {total:.3f} seconds")
    return total

In [None]:
# print(next(iter(training_dataloader))[0].shape)

In [None]:
def accuracy_fn(predicted,actual):
    _, predictions = torch.max(predicted, dim=1)
    return torch.tensor(torch.sum(predictions==actual).item()/len(predictions))

In [None]:
# Training loop
def train_and_validate(model, training_set, validation_set, epochs, learning_rate, best_acc = None,):
    loss_fn = nn.CrossEntropyLoss()
    optim = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

    for epoch in tqdm(range(epochs)):
        training_loss = 0

        # Enumerate over all batches
        for image, label in training_set:
            image, label = image.to(device), label.to(device)
            model.train()

            # Pass training batch into model for prediction
            pred = model(image)
            loss = loss_fn(pred, label)
            training_loss += loss

            optim.zero_grad()
            loss.backward()
            optim.step()

        # Calculate average training loss
        training_loss /= len(training_set)

        validation_loss, validation_acc = 0, 0

        # Disable weight updating
        model.eval()
        with torch.inference_mode():
            for image, label in validation_set:
                image, label = image.to(device), label.to(device)

                pred = model(image)
                loss = loss_fn(pred, label)
                validation_loss += loss

                validation_acc += accuracy_fn(pred, label)

            validation_loss /= len(validation_set)
            validation_acc /= len(validation_set)

            # if epoch % 10 == 0:
            #     print(f"Training Loss: {training_loss:.5f} | Validation Loss: {validation_loss:.5f} | Validation Accuracy: {validation_acc:.5f}")

    # if validation_acc > best_acc:
    #     model_path = Path("models")
    #     model_path.mkdir(parents=True, exist_ok=True)

    #     model_name = "best_vgg11"
    #     model_save_path = model_path / model_name

    #     torch.save(model.state_dict(), model_save_path)

    return training_loss, validation_loss, validation_acc

In [None]:
# Hyperparamters
LEARNING_RATE = [0.00001, 0.0001, 0.001, 0.01, 0.1]
EPOCHS = [10, 20, 30, 40, 50]
BATCH_SIZE = [32, 64, 128, 256, 512]

In [None]:
# Grid search and record performance of every combination

training_losses = []
validation_losses = []
accuracies = []
combinations = []
training_time = []
best_accuracy = 0

for epoch, batch_size, lr in itertools.product(EPOCHS, BATCH_SIZE, LEARNING_RATE):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))])
    training_split, validation_split = torch.utils.data.random_split(training_data, [40000, 10000])

    # Put training and testing data in dataloaders for efficient training
    training_dataloader = DataLoader(dataset=training_split, batch_size=batch_size, shuffle=True)
    validation_dataloader = DataLoader(dataset=validation_split, batch_size=batch_size, shuffle=False)

    model = VGG_11(in_features=3, num_classes=NUM_CLASSES).to(device=device)

    start = time.time()
    train_loss, validation_loss, accuracy = train_and_validate(model=model, training_set=training_dataloader, validation_set=validation_dataloader, epochs=epoch, learning_rate=lr, , best_acc=best_accuracy)
    end = time.time()
    time_diff = print_train_time(start, end)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
    print(f"Accuracy: {accuracy:.5f} | Epoch: {epoch} | Batch Size: {batch_size} | Learning Rate: {lr} | Training time: {time_diff:.5f} seconds")

    accuracies.append(accuracy.item())
    training_losses.append(train_loss.item())
    validation_losses.append(validation_loss.item())
    combinations.append([epoch, batch_size, lr])
    training_time.append(time_diff)

In [None]:
# Changing epoch, fix batch size 128 and learning rate 0.01
training_losses = []
validation_losses = []
accuracies = []

for epoch in EPOCHS:
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))])
    training_split, validation_split = torch.utils.data.random_split(training_data, [40000, 10000])

    # Put training and testing data in dataloaders for efficient training
    training_dataloader = DataLoader(dataset=training_split, batch_size=BATCH_SIZE[2], shuffle=True)
    validation_dataloader = DataLoader(dataset=validation_split, batch_size=BATCH_SIZE[2], shuffle=False)

    model = VGG_11(in_features=3, num_classes=NUM_CLASSES).to(device=device)
    train_loss, validation_loss, accuracy = train_and_validate(model=model, training_set=training_dataloader, validation_set=validation_dataloader, epochs=epoch, learning_rate=LEARNING_RATE[2])

    accuracies.append(accuracy.item())
    training_losses.append(train_loss.item())
    validation_losses.append(validation_loss.item())

print(f"training_losses = {training_losses}")
print()
print(f"validation_losses = {validation_losses}")
print()
print(f"validation_accuracy = {accuracies}")

In [None]:
# Changing batch size, fix epoch 30 and learning rate 0.01
training_losses = []
validation_losses = []
accuracies = []

for batch_size in BATCH_SIZE:
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))])
    training_split, validation_split = torch.utils.data.random_split(training_data, [40000, 10000])

    # Put training and testing data in dataloaders for efficient training
    training_dataloader = DataLoader(dataset=training_split, batch_size=batch_size, shuffle=True)
    validation_dataloader = DataLoader(dataset=validation_split, batch_size=batch_size, shuffle=False)

    model = VGG_11(in_features=3, num_classes=NUM_CLASSES).to(device=device)
    train_loss, validation_loss, accuracy = train_and_validate(model=model, training_set=training_dataloader, validation_set=validation_dataloader, epochs=EPOCHS[2], learning_rate=LEARNING_RATE[2])

    accuracies.append(accuracy.item())
    training_losses.append(train_loss.item())
    validation_losses.append(validation_loss.item())

print(f"training_losses = {training_losses}")
print()
print(f"validation_losses = {validation_losses}")
print()
print(f"validation_accuracy = {accuracies}")

In [None]:
# Changing learning rate, fix batch size and epoch 30
training_losses = []
validation_losses = []
accuracies = []

for lr in LEARNING_RATE:
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))])
    training_split, validation_split = torch.utils.data.random_split(training_data, [40000, 10000])

    # Put training and testing data in dataloaders for efficient training
    training_dataloader = DataLoader(dataset=training_split, batch_size=BATCH_SIZE[2], shuffle=True)
    validation_dataloader = DataLoader(dataset=validation_split, batch_size=BATCH_SIZE[2], shuffle=False)

    model = VGG_11(in_features=3, num_classes=NUM_CLASSES).to(device=device)
    train_loss, validation_loss, accuracy = train_and_validate(model=model, training_set=training_dataloader, validation_set=validation_dataloader, epochs=EPOCHS[2], learning_rate=lr)

    accuracies.append(accuracy.item())
    training_losses.append(train_loss.item())
    validation_losses.append(validation_loss.item())

print(f"training_losses = {training_losses}")
print()
print(f"validation_losses = {validation_losses}")
print()
print(f"validation_accuracy = {accuracies}")

In [None]:
def test(model, testing_dataloader):
    testing_loss, testing_acc = 0, 0

    # Disable weight updating
    model.eval()
    loss_fn = nn.CrossEntropyLoss()
    with torch.inference_mode():
        for image, label in testing_dataloader:
            image, label = image.to(device), label.to(device)

            pred = model(image)
            loss = loss_fn(pred, label)
            testing_loss += loss

            testing_acc += accuracy_fn(pred, label)

        testing_loss /= len(testing_dataloader)
        testing_acc /= len(testing_dataloader)
        print(f"Testing Loss: {testing_loss:.5f} | Testing Accuracy: {testing_acc:.5f} ")

In [None]:
# Train and test different VGG model
vgg_11 = VGG_11(in_features=3, num_classes=NUM_CLASSES).to(device=device)
train_loss, validation_loss, accuracy = train_and_validate(model=vgg_11, training_set=training_dataloader, validation_set=validation_dataloader, epochs=50, learning_rate=0.0001)
test(vgg_11, testing_dataloader)

vgg_13 = VGG_13(in_features=3, num_classes=NUM_CLASSES).to(device=device)
train_loss, validation_loss, accuracy = train_and_validate(model=vgg_13, training_set=training_dataloader, validation_set=validation_dataloader, epochs=50, learning_rate=0.0001)
test(vgg_13, testing_dataloader)

vgg_16 = VGG_16(in_features=3, num_classes=NUM_CLASSES).to(device=device)
train_loss, validation_loss, accuracy = train_and_validate(model=vgg_16, training_set=training_dataloader, validation_set=validation_dataloader, epochs=50, learning_rate=0.0001)
test(vgg_16, testing_dataloader)

vgg_19 = VGG_19(in_features=3, num_classes=NUM_CLASSES).to(device=device)
train_loss, validation_loss, accuracy = train_and_validate(model=vgg_19, training_set=training_dataloader, validation_set=validation_dataloader, epochs=50, learning_rate=0.0001)
test(vgg_19, testing_dataloader)

In [None]:
# Define validation accuracies for each experiment
accuracy_1 = [0.2562302350997925, 0.2601028497517109, 0.2763053774833679, 0.2838211953639984, 0.2811511158943176]
accuracy_2 = [0.2678995132446289, 0.27713607512414455, 0.2965783178806305, 0.26869067549705505, 0.2773931920528412]
accuracy_3 = [0.2591969966888428, 0.2447586953639984, 0.275158230029046535, 0.2849090099334717, 0.28016218543052673]
accuracy_4 = [0.20886075496673584, 0.2873813211917877, 0.2727452516555786, 0.2913370132446289, 0.2861946225166321]
accuracy_5 = [0.26780062913894653, 0.2951938211917877, 0.26028481125831604, 0.23289161920547485, 0.28243669867515564]

# Plot each line
plt.figure(figsize=(10, 6))
plt.plot(EPOCHS, accuracy_1, label='Trial 1')
plt.plot(EPOCHS, accuracy_2, label='Trial 2')
plt.plot(EPOCHS, accuracy_3, label='Trial 3')
plt.plot(EPOCHS, accuracy_4, label='Trial 4')
plt.plot(EPOCHS, accuracy_5, label='Trial 5')

# Add labels and title
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy against varying epochs, batch size=128, learning rate=0.01')
plt.xticks(EPOCHS)
plt.legend()
plt.grid(True)

# Show plot
plt.show()

In [None]:
# Define validation accuracies for each experiment
accuracy_1 = [0.22314296662807465, 0.2811504900455475, 0.29311707615852356, 0.3106445372104645, 0.31591796875]
accuracy_2 = [0.007987220771610737, 0.007663216441869736, 0.2525711953639984, 0.2911132872104645, 0.3158145546913147]
accuracy_3 = [0.008087060414254665, 0.26532644033432007, 0.2906447649002075, 0.263671875, 0.29326745867729187]
accuracy_4 = [0.008087060414254665, 0.2518909275531769, 0.2705696225166321, 0.30878907442092896, 0.3126436173915863]
accuracy_5 = [0.0083865812048316, 0.008061305619776249, 0.2795688211917877, 0.28789061307907104, 0.29856961965560913]

# Plot each line
plt.figure(figsize=(10, 6))
plt.plot(BATCH_SIZE, accuracy_1, label='Trial 1')
plt.plot(BATCH_SIZE, accuracy_2, label='Trial 2')
plt.plot(BATCH_SIZE, accuracy_3, label='Trial 3')
plt.plot(BATCH_SIZE, accuracy_4, label='Trial 4')
plt.plot(BATCH_SIZE, accuracy_5, label='Trial 5')

# Add labels and title
plt.xlabel('Batch size')
plt.ylabel('Accuracy')
plt.title('Accuracy against varying batch size, epochs=30, learning Rate=0.01')
plt.xticks(BATCH_SIZE)
plt.legend()
plt.grid(True)

# Show plot
plt.show()

In [None]:
# Define validation accuracies for each experiment
accuracy_1 = [0.16139240562915802, 0.3088409900665283, 0.24792325496673584, 0.009394778870046139, 0.009493670426309109]
accuracy_2 = [0.1451740562915802, 0.2861946225166321, 0.2593947649002075, 0.008900316432118416, 0.010087025351822376]
accuracy_3 = [0.1610957235097885, 0.296281635761261, 0.2454509437084198, 0.009394778870046139, 0.009493670426309109]
accuracy_4 = [0.16070015728473663, 0.3125988841056824, 0.2470332235097885, 0.009394778870046139, 0.009493670426309109]
accuracy_5 = [0.14003165066242218, 0.3005340099334717, 0.2750197649002075, 0.008504746481776237, 0.00899920891970396]

# Plot each line
plt.figure(figsize=(10, 6))
plt.plot(LEARNING_RATE, accuracy_1, label='Trial 1')
plt.plot(LEARNING_RATE, accuracy_2, label='Trial 2')
plt.plot(LEARNING_RATE, accuracy_3, label='Trial 3')
plt.plot(LEARNING_RATE, accuracy_4, label='Trial 4')
plt.plot(LEARNING_RATE, accuracy_5, label='Trial 5')

# Add labels and title
plt.xlabel('Learning rate')
plt.ylabel('Accuracy')
plt.title('Accuracy against learning rate, epochs=30, batch size=128')
plt.legend()
plt.grid(True)

# Show plot
plt.show()

In [None]:
# Define accuracy values for each trial
accuracy_1 = [0.16139240562915802, 0.3088409900665283, 0.24792325496673584, 0.009394778870046139, 0.009493670426309109]
accuracy_2 = [0.1451740562915802, 0.2861946225166321, 0.2593947649002075, 0.008900316432118416, 0.010087025351822376]
accuracy_3 = [0.1610957235097885, 0.296281635761261, 0.2454509437084198, 0.009394778870046139, 0.009493670426309109]
accuracy_4 = [0.16070015728473663, 0.3125988841056824, 0.2470332235097885, 0.009394778870046139, 0.009493670426309109]
accuracy_5 = [0.14003165066242218, 0.3005340099334717, 0.2750197649002075, 0.008504746481776237, 0.00899920891970396]

# Create a PrettyTable instance
table = PrettyTable()
table.field_names = ['Learning Rate'] + [f"Trial {i}" for i in range(1, 6)]

for i, lr in enumerate(LEARNING_RATE):
    row = [lr] + [f"{accuracy:.5f}" for accuracy in [accuracy_1[i], accuracy_2[i], accuracy_3[i], accuracy_4[i], accuracy_5[i]]]
    table.add_row(row)

print(table)


### Results of grid search

In [None]:
################################################################ EPCOH 10 ########################################################################################
# Accuracy: 0.13379 | Epoch: 10 | Batch Size: 32 | Learning Rate: 1e-05 | Training time: 390.81456 seconds
# Accuracy: 0.31729 | Epoch: 10 | Batch Size: 32 | Learning Rate: 0.0001 | Training time: 386.47828 seconds
# Accuracy: 0.00809 | Epoch: 10 | Batch Size: 32 | Learning Rate: 0.001 | Training time: 349.35272 seconds
# Accuracy: 0.00819 | Epoch: 10 | Batch Size: 32 | Learning Rate: 0.01 | Training time: 328.37998 seconds
# Accuracy: 0.00998 | Epoch: 10 | Batch Size: 32 | Learning Rate: 0.1 | Training time: 336.23687 seconds
# Accuracy: 0.10360 | Epoch: 10 | Batch Size: 64 | Learning Rate: 1e-05 | Training time: 262.23336 seconds
# Accuracy: 0.27697 | Epoch: 10 | Batch Size: 64 | Learning Rate: 0.0001 | Training time: 257.01243 seconds
# Accuracy: 0.20213 | Epoch: 10 | Batch Size: 64 | Learning Rate: 0.001 | Training time: 247.56458 seconds
# Accuracy: 0.00796 | Epoch: 10 | Batch Size: 64 | Learning Rate: 0.01 | Training time: 242.45815 seconds
# Accuracy: 0.01045 | Epoch: 10 | Batch Size: 64 | Learning Rate: 0.1 | Training time: 247.35445 seconds
# Accuracy: 0.09424 | Epoch: 10 | Batch Size: 128 | Learning Rate: 1e-05 | Training time: 198.99599 seconds
# Accuracy: 0.24604 | Epoch: 10 | Batch Size: 128 | Learning Rate: 0.0001 | Training time: 202.52672 seconds
# Accuracy: 0.22725 | Epoch: 10 | Batch Size: 128 | Learning Rate: 0.001 | Training time: 227.00513 seconds
# Accuracy: 0.00870 | Epoch: 10 | Batch Size: 128 | Learning Rate: 0.01 | Training time: 246.28857 seconds
# Accuracy: 0.01019 | Epoch: 10 | Batch Size: 128 | Learning Rate: 0.1 | Training time: 242.73577 seconds
# Accuracy: 0.08271 | Epoch: 10 | Batch Size: 256 | Learning Rate: 1e-05 | Training time: 208.47278 seconds
# Accuracy: 0.22813 | Epoch: 10 | Batch Size: 256 | Learning Rate: 0.0001 | Training time: 208.42504 seconds
# Accuracy: 0.25811 | Epoch: 10 | Batch Size: 256 | Learning Rate: 0.001 | Training time: 190.55384 seconds
# Accuracy: 0.00820 | Epoch: 10 | Batch Size: 256 | Learning Rate: 0.01 | Training time: 167.63105 seconds
# Accuracy: 0.00947 | Epoch: 10 | Batch Size: 256 | Learning Rate: 0.1 | Training time: 167.61161 seconds
# Accuracy: 0.06955 | Epoch: 10 | Batch Size: 512 | Learning Rate: 1e-05 | Training time: 161.50041 seconds
# Accuracy: 0.14685 | Epoch: 10 | Batch Size: 512 | Learning Rate: 0.0001 | Training time: 160.72876 seconds
# Accuracy: 0.22937 | Epoch: 10 | Batch Size: 512 | Learning Rate: 0.001 | Training time: 157.15444 seconds
# Accuracy: 0.00795 | Epoch: 10 | Batch Size: 512 | Learning Rate: 0.01 | Training time: 153.05228 seconds
# Accuracy: 0.00972 | Epoch: 10 | Batch Size: 512 | Learning Rate: 0.1 | Training time: 151.64169 seconds

################################################################ EPCOH 20 ########################################################################################
# Accuracy: 0.19838 | Epoch: 20 | Batch Size: 32 | Learning Rate: 1e-05 | Training time: 726.25195 seconds
# Accuracy: 0.34255 | Epoch: 20 | Batch Size: 32 | Learning Rate: 0.0001 | Training time: 768.37213 seconds
# Accuracy: 0.00779 | Epoch: 20 | Batch Size: 32 | Learning Rate: 0.001 | Training time: 725.62046 seconds
# Accuracy: 0.00968 | Epoch: 20 | Batch Size: 32 | Learning Rate: 0.01 | Training time: 708.59335 seconds
# Accuracy: 0.01018 | Epoch: 20 | Batch Size: 32 | Learning Rate: 0.1 | Training time: 707.16604 seconds
# Accuracy: 0.16551 | Epoch: 20 | Batch Size: 64 | Learning Rate: 1e-05 | Training time: 528.99893 seconds
# Accuracy: 0.31389 | Epoch: 20 | Batch Size: 64 | Learning Rate: 0.0001 | Training time: 525.96276 seconds
# Accuracy: 0.24871 | Epoch: 20 | Batch Size: 64 | Learning Rate: 0.001 | Training time: 515.40858 seconds
# Accuracy: 0.01015 | Epoch: 20 | Batch Size: 64 | Learning Rate: 0.01 | Training time: 506.41893 seconds
# Accuracy: 0.00936 | Epoch: 20 | Batch Size: 64 | Learning Rate: 0.1 | Training time: 501.42039 seconds
# Accuracy: 0.11808 | Epoch: 20 | Batch Size: 128 | Learning Rate: 1e-05 | Training time: 391.94045 seconds
# Accuracy: 0.28135 | Epoch: 20 | Batch Size: 128 | Learning Rate: 0.0001 | Training time: 399.79583 seconds
# Accuracy: 0.31557 | Epoch: 20 | Batch Size: 128 | Learning Rate: 0.001 | Training time: 382.17563 seconds
# Accuracy: 0.00920 | Epoch: 20 | Batch Size: 128 | Learning Rate: 0.01 | Training time: 399.33035 seconds
# Accuracy: 0.01058 | Epoch: 20 | Batch Size: 128 | Learning Rate: 0.1 | Training time: 399.91044 seconds
# Accuracy: 0.09775 | Epoch: 20 | Batch Size: 256 | Learning Rate: 1e-05 | Training time: 326.40026 seconds
# Accuracy: 0.26201 | Epoch: 20 | Batch Size: 256 | Learning Rate: 0.0001 | Training time: 320.21635 seconds
# Accuracy: 0.28740 | Epoch: 20 | Batch Size: 256 | Learning Rate: 0.001 | Training time: 314.28493 seconds
# Accuracy: 0.01006 | Epoch: 20 | Batch Size: 256 | Learning Rate: 0.01 | Training time: 320.44222 seconds
# Accuracy: 0.00840 | Epoch: 20 | Batch Size: 256 | Learning Rate: 0.1 | Training time: 322.88630 seconds
# Accuracy: 0.09339 | Epoch: 20 | Batch Size: 512 | Learning Rate: 1e-05 | Training time: 274.62500 seconds
# Accuracy: 0.23515 | Epoch: 20 | Batch Size: 512 | Learning Rate: 0.0001 | Training time: 278.62935 seconds
# Accuracy: 0.32491 | Epoch: 20 | Batch Size: 512 | Learning Rate: 0.001 | Training time: 291.49685 seconds
# Accuracy: 0.00922 | Epoch: 20 | Batch Size: 512 | Learning Rate: 0.01 | Training time: 293.07171 seconds
# Accuracy: 0.00970 | Epoch: 20 | Batch Size: 512 | Learning Rate: 0.1 | Training time: 306.68558 seconds

################################################################ EPCOH 30 ########################################################################################
# Accuracy: 0.24631 | Epoch: 30 | Batch Size: 32 | Learning Rate: 1e-05 | Training time: 1096.74656 seconds
# Accuracy: 0.34505 | Epoch: 30 | Batch Size: 32 | Learning Rate: 0.0001 | Training time: 1130.36975 seconds
# Accuracy: 0.00769 | Epoch: 30 | Batch Size: 32 | Learning Rate: 0.001 | Training time: 1057.11082 seconds
# Accuracy: 0.00978 | Epoch: 30 | Batch Size: 32 | Learning Rate: 0.01 | Training time: 1061.04983 seconds
# Accuracy: 0.01068 | Epoch: 30 | Batch Size: 32 | Learning Rate: 0.1 | Training time: 1061.26722 seconds
# Accuracy: 0.18840 | Epoch: 30 | Batch Size: 64 | Learning Rate: 1e-05 | Training time: 795.11669 seconds
# Accuracy: 0.33081 | Epoch: 30 | Batch Size: 64 | Learning Rate: 0.0001 | Training time: 778.28329 seconds
# Accuracy: 0.00796 | Epoch: 30 | Batch Size: 64 | Learning Rate: 0.001 | Training time: 727.85373 seconds
# Accuracy: 0.00896 | Epoch: 30 | Batch Size: 64 | Learning Rate: 0.01 | Training time: 724.01872 seconds
# Accuracy: 0.00945 | Epoch: 30 | Batch Size: 64 | Learning Rate: 0.1 | Training time: 723.94992 seconds
# Accuracy: 0.15279 | Epoch: 30 | Batch Size: 128 | Learning Rate: 1e-05 | Training time: 594.74479 seconds
# Accuracy: 0.28125 | Epoch: 30 | Batch Size: 128 | Learning Rate: 0.0001 | Training time: 585.78692 seconds
# Accuracy: 0.26404 | Epoch: 30 | Batch Size: 128 | Learning Rate: 0.001 | Training time: 567.03618 seconds
# Accuracy: 0.00860 | Epoch: 30 | Batch Size: 128 | Learning Rate: 0.01 | Training time: 555.54059 seconds
# Accuracy: 0.00920 | Epoch: 30 | Batch Size: 128 | Learning Rate: 0.1 | Training time: 555.84220 seconds
# Accuracy: 0.12070 | Epoch: 30 | Batch Size: 256 | Learning Rate: 1e-05 | Training time: 513.62879 seconds
# Accuracy: 0.25273 | Epoch: 30 | Batch Size: 256 | Learning Rate: 0.0001 | Training time: 506.31074 seconds
# Accuracy: 0.28203 | Epoch: 30 | Batch Size: 256 | Learning Rate: 0.001 | Training time: 492.19768 seconds
# Accuracy: 0.00791 | Epoch: 30 | Batch Size: 256 | Learning Rate: 0.01 | Training time: 478.15668 seconds
# Accuracy: 0.00742 | Epoch: 30 | Batch Size: 256 | Learning Rate: 0.1 | Training time: 479.02802 seconds
# Accuracy: 0.10943 | Epoch: 30 | Batch Size: 512 | Learning Rate: 1e-05 | Training time: 466.44419 seconds
# Accuracy: 0.25018 | Epoch: 30 | Batch Size: 512 | Learning Rate: 0.0001 | Training time: 463.56500 seconds
# Accuracy: 0.30844 | Epoch: 30 | Batch Size: 512 | Learning Rate: 0.001 | Training time: 453.54411 seconds
# Accuracy: 0.06221 | Epoch: 30 | Batch Size: 512 | Learning Rate: 0.01 | Training time: 445.76804 seconds
# Accuracy: 0.01104 | Epoch: 30 | Batch Size: 512 | Learning Rate: 0.1 | Training time: 443.24127 seconds

################################################################ EPCOH 40 ########################################################################################
# Accuracy: 0.27286 | Epoch: 40 | Batch Size: 32 | Learning Rate: 1e-05 | Training time: 1448.21678 seconds
# Accuracy: 0.36082 | Epoch: 40 | Batch Size: 32 | Learning Rate: 0.0001 | Training time: 1472.62633 seconds
# Accuracy: 0.00849 | Epoch: 40 | Batch Size: 32 | Learning Rate: 0.001 | Training time: 1474.68807 seconds
# Accuracy: 0.00899 | Epoch: 40 | Batch Size: 32 | Learning Rate: 0.01 | Training time: 1456.57691 seconds
# Accuracy: 0.00929 | Epoch: 40 | Batch Size: 32 | Learning Rate: 0.1 | Training time: 1422.02242 seconds
# Accuracy: 0.22731 | Epoch: 40 | Batch Size: 64 | Learning Rate: 1e-05 | Training time: 1123.10497 seconds
# Accuracy: 0.33529 | Epoch: 40 | Batch Size: 64 | Learning Rate: 0.0001 | Training time: 1044.17301 seconds
# Accuracy: 0.25746 | Epoch: 40 | Batch Size: 64 | Learning Rate: 0.001 | Training time: 997.02242 seconds
# Accuracy: 0.01035 | Epoch: 40 | Batch Size: 64 | Learning Rate: 0.01 | Training time: 1002.52730 seconds
# Accuracy: 0.01005 | Epoch: 40 | Batch Size: 64 | Learning Rate: 0.1 | Training time: 1049.70041 seconds
# Accuracy: 0.17514 | Epoch: 40 | Batch Size: 128 | Learning Rate: 1e-05 | Training time: 809.14336 seconds
# Accuracy: 0.31220 | Epoch: 40 | Batch Size: 128 | Learning Rate: 0.0001 | Training time: 782.10480 seconds
# Accuracy: 0.25771 | Epoch: 40 | Batch Size: 128 | Learning Rate: 0.001 | Training time: 762.52917 seconds
# Accuracy: 0.00920 | Epoch: 40 | Batch Size: 128 | Learning Rate: 0.01 | Training time: 749.90570 seconds
# Accuracy: 0.00850 | Epoch: 40 | Batch Size: 128 | Learning Rate: 0.1 | Training time: 829.31714 seconds
# Accuracy: 0.13965 | Epoch: 40 | Batch Size: 256 | Learning Rate: 1e-05 | Training time: 705.97505 seconds
# Accuracy: 0.27187 | Epoch: 40 | Batch Size: 256 | Learning Rate: 0.0001 | Training time: 801.56022 seconds
# Accuracy: 0.27490 | Epoch: 40 | Batch Size: 256 | Learning Rate: 0.001 | Training time: 849.67602 seconds
# Accuracy: 0.00957 | Epoch: 40 | Batch Size: 256 | Learning Rate: 0.01 | Training time: 841.51689 seconds
# Accuracy: 0.01025 | Epoch: 40 | Batch Size: 256 | Learning Rate: 0.1 | Training time: 754.96118 seconds
# Accuracy: 0.13145 | Epoch: 40 | Batch Size: 512 | Learning Rate: 1e-05 | Training time: 711.09558 seconds
# Accuracy: 0.26050 | Epoch: 40 | Batch Size: 512 | Learning Rate: 0.0001 | Training time: 632.59679 seconds
# Accuracy: 0.30994 | Epoch: 40 | Batch Size: 512 | Learning Rate: 0.001 | Training time: 617.61975 seconds
# Accuracy: 0.00790 | Epoch: 40 | Batch Size: 512 | Learning Rate: 0.01 | Training time: 598.88243 seconds
# Accuracy: 0.00973 | Epoch: 40 | Batch Size: 512 | Learning Rate: 0.1 | Training time: 620.02222 seconds

################################################################ EPCOH 50 ########################################################################################
# Accuracy: 0.30521 | Epoch: 50 | Batch Size: 32 | Learning Rate: 1e-05 | Training time: 1864.02039 seconds
# Accuracy: 0.37610 | Epoch: 50 | Batch Size: 32 | Learning Rate: 0.0001 | Training time: 1893.24597 seconds
# Accuracy: 0.19599 | Epoch: 50 | Batch Size: 32 | Learning Rate: 0.001 | Training time: 1821.38071 seconds
# Accuracy: 0.00899 | Epoch: 50 | Batch Size: 32 | Learning Rate: 0.01 | Training time: 1775.12522 seconds
# Accuracy: 0.01018 | Epoch: 50 | Batch Size: 32 | Learning Rate: 0.1 | Training time: 1783.44322 seconds
# Accuracy: 0.25518 | Epoch: 50 | Batch Size: 64 | Learning Rate: 1e-05 | Training time: 1332.24501 seconds
# Accuracy: 0.33430 | Epoch: 50 | Batch Size: 64 | Learning Rate: 0.0001 | Training time: 1298.01856 seconds
# Accuracy: 0.26354 | Epoch: 50 | Batch Size: 64 | Learning Rate: 0.001 | Training time: 1247.96640 seconds
# Accuracy: 0.00936 | Epoch: 50 | Batch Size: 64 | Learning Rate: 0.01 | Training time: 1201.61187 seconds
# Accuracy: 0.01025 | Epoch: 50 | Batch Size: 64 | Learning Rate: 0.1 | Training time: 1209.36929 seconds
# Accuracy: 0.18147 | Epoch: 50 | Batch Size: 128 | Learning Rate: 1e-05 | Training time: 993.38018 seconds
# Accuracy: 0.29618 | Epoch: 50 | Batch Size: 128 | Learning Rate: 0.0001 | Training time: 976.19054 seconds
# Accuracy: 0.28313 | Epoch: 50 | Batch Size: 128 | Learning Rate: 0.001 | Training time: 952.03889 seconds
# Accuracy: 0.00860 | Epoch: 50 | Batch Size: 128 | Learning Rate: 0.01 | Training time: 930.11472 seconds
# Accuracy: 0.00722 | Epoch: 50 | Batch Size: 128 | Learning Rate: 0.1 | Training time: 932.50398 seconds
# Accuracy: 0.14316 | Epoch: 50 | Batch Size: 256 | Learning Rate: 1e-05 | Training time: 855.88286 seconds
# Accuracy: 0.29473 | Epoch: 50 | Batch Size: 256 | Learning Rate: 0.0001 | Training time: 848.92746 seconds
# Accuracy: 0.28740 | Epoch: 50 | Batch Size: 256 | Learning Rate: 0.001 | Training time: 824.78791 seconds
# Accuracy: 0.00889 | Epoch: 50 | Batch Size: 256 | Learning Rate: 0.01 | Training time: 802.71485 seconds
# Accuracy: 0.00840 | Epoch: 50 | Batch Size: 256 | Learning Rate: 0.1 | Training time: 810.44887 seconds
# Accuracy: 0.12437 | Epoch: 50 | Batch Size: 512 | Learning Rate: 1e-05 | Training time: 782.24551 seconds
# Accuracy: 0.26908 | Epoch: 50 | Batch Size: 512 | Learning Rate: 0.0001 | Training time: 778.42888 seconds
# Accuracy: 0.28914 | Epoch: 50 | Batch Size: 512 | Learning Rate: 0.001 | Training time: 768.94911 seconds
# Accuracy: 0.00846 | Epoch: 50 | Batch Size: 512 | Learning Rate: 0.01 | Training time: 878.58997 seconds
# Accuracy: 0.00858 | Epoch: 50 | Batch Size: 512 | Learning Rate: 0.1 | Training time: 745.37519 seconds

### Results of different VGG models and regularisation

In [None]:
# 50 epochs, 32 batch size, 0.0001 learning rate

# Dropout 0.5
# vgg_11 = Testing Loss: 5.15961 | Testing Accuracy: 0.41880
# vgg_13 = Testing Loss: 4.63639 | Testing Accuracy: 0.45356
# vgg_16 = Testing Loss: 3.53128 | Testing Accuracy: 0.39660
# vgg_19 = Testing Loss: 3.15414 | Testing Accuracy: 0.36104

# BatchNorm + Dropout 0.5
# vgg_11 = Testing Loss: 4.05076 | Testing Accuracy: 0.46176
# vgg_13 = Testing Loss: 3.88811 | Testing Accuracy: 0.48413
# vgg_16 = Testing Loss: 3.85989 | Testing Accuracy: 0.47584
# vgg_19 = Testing Loss: 4.33979 | Testing Accuracy: 0.42003

# BatchNorm
# vgg_11 = Testing Loss: 4.43091 | Testing Accuracy: 0.45597
# vgg_13 = Testing Loss: 4.28165 | Testing Accuracy: 0.47973
# vgg_16 = Testing Loss: 4.46354 | Testing Accuracy: 0.46496
# vgg_19 = Testing Loss: 5.54824 | Testing Accuracy: 0.39746

# No regularisation
# vgg_11 = Testing Loss: 7.46899 | Testing Accuracy: 0.36022
# vgg_13 = Testing Loss: 6.41228 | Testing Accuracy: 0.38878
# vgg_16 = Testing Loss: 7.58147 | Testing Accuracy: 0.32059
# vgg_19 = Testing Loss: 5.84439 | Testing Accuracy: 0.27396