In [71]:
# Necessary imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
from prettytable import PrettyTable
from tqdm.auto import tqdm
import time

In [72]:
# Device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [73]:
# Hyperparamters
LEARNING_RATE = 1e-3
EPOCHS = 50
BATCH_SIZE = 256

In [74]:
# transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))])

# # Download CiFAR-100 Dataset from PyTorch
# training_data = datasets.CIFAR100(
#                 root="data",                        # Set root directory of data
#                 train=True,                         # Get training dataset
#                 download=True,                      # Download the data
#                 transform=transform)                # Transform the dataset into tensors and normalise

# testing_data = datasets.CIFAR100(
#                 root="data",                        # Set root directory of data
#                 train=False,                        # Get testing dataset
#                 download=True,                      # Download the data
#                 transform=transform)                # Transform the dataset into tensors and normalise        

# print(f"Length of training data: {len(training_data)}")
# print(f"Length of testing data: {len(testing_data)}")

In [75]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2470, 0.2435, 0.2616))])

# Download CiFAR-10 Dataset from PyTorch
training_data = datasets.CIFAR10(
                root="data",                        # Set root directory of data
                train=True,                         # Get training dataset
                download=True,                      # Download the data
                transform=transform)                # Transform the dataset into tensors and normalise

testing_data = datasets.CIFAR10(
                root="data",                        # Set root directory of data
                train=False,                        # Get testing dataset
                download=True,                      # Download the data
                transform=transform)                # Transform the dataset into tensors and normalise        

print(f"Length of training data: {len(training_data)}")
print(f"Length of testing data: {len(testing_data)}")

Files already downloaded and verified
Files already downloaded and verified
Length of training data: 50000
Length of testing data: 10000


In [76]:
# Shape of an image ([colour channels, height, width])
# image, label = training_data[0]
# print(f"Image Shape: {image.shape}")
# print(f"Label: {training_data.classes[label]}")

# plt.figure(figsize=(3,3))
# plt.imshow(transforms.ToPILImage()(image))
# plt.title(training_data.classes[label])
# plt.axis(False)

In [77]:
NUM_CLASSES = len(training_data.classes)
print(NUM_CLASSES)
training_data, validation_data = torch.utils.data.random_split(training_data, [40000, 10000])

# Put training and testing data in dataloaders for efficient training
training_dataloader = DataLoader(dataset=training_data, batch_size=BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(dataset=validation_data, batch_size=BATCH_SIZE, shuffle=False)
testing_dataloader = DataLoader(dataset=testing_data, batch_size=BATCH_SIZE, shuffle=False)

10


In [78]:
# Define VGG-11 architecture
# 11 Weight layers -> Flatten -> 3 FC Layers -> Softmax
# VGG_11 = 64, MaxPool, 128, MaxPool, 256, 256, MaxPool, 512, 512, MaxPool, 512, 512, MaxPool

class VGG_11(nn.Module):
    def __init__(self, in_features = 3, num_classes = 100):
        super().__init__()

        # Every conv2d layer has a kernel size 3, stride 1 and padding 1
        # All 5 max pool layers has kernel size 2 and stride 2
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            ) 
  
        # After the last max pool layer, VGG has 3 fully connected layer
        # in_features = last_out_channels*(img_height/stride**num_max_pool)*(img_width/stride**num_max_pool)
        # 512*(32/2**5)*(32/2**5)

        self.fc = nn.Sequential(
            nn.Linear(in_features=512*1*1, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
            )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x

In [79]:
# Define VGG-13 architecture
# 13 Weight layers -> Flatten -> 3 FC Layers -> Softmax
# VGG_13 = 64, 64, MaxPool, 128, 128, MaxPool, 256, 256, MaxPool, 512, 512, MaxPool, 512, 512, MaxPool

class VGG_13(nn.Module):
    def __init__(self, in_features = 3, num_classes = 100):
        super().__init__()

        # Every conv2d layer has a kernel size 3, stride 1 and padding 1
        # All 5 max pool layers has kernel size 2 and stride 2
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            ) 
  
        # After the last max pool layer, VGG has 3 fully connected layer
        # in_features = last_out_channels*(img_height/stride**num_max_pool)*(img_width/stride**num_max_pool)
        # 512*(32/2**5)*(32/2**5)

        self.fc = nn.Sequential(
            nn.Linear(in_features=512*1*1, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
            )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x

In [80]:
# Define VGG-16 architecture
# 16 Weight layers -> Flatten -> 3 FC Layers -> Softmax
# VGG_16 = 64, 64, MaxPool, 128, 128, MaxPool, 256, 256, 256, MaxPool, 512, 512, 512, MaxPool, 512, 512, 512, MaxPool

class VGG_16(nn.Module):
    def __init__(self, in_features = 3, num_classes = 100):
        super().__init__()

        # Every conv2d layer has a kernel size 3, stride 1 and padding 1
        # All 5 max pool layers has kernel size 2 and stride 2
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            ) 
  
        # After the last max pool layer, VGG has 3 fully connected layer
        # in_features = last_out_channels*(img_height/stride**num_max_pool)*(img_width/stride**num_max_pool)
        # 512*(32/2**5)*(32/2**5)

        self.fc = nn.Sequential(
            nn.Linear(in_features=512*1*1, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=num_classes)
            )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x

In [81]:
# Function to print out layer and number of parameters in a table
def model_summary(model):
    summary = PrettyTable(["Layers", "Parameters"])
    total_params = 0
    for layer, parameter in model.named_parameters():
        # Skip non-trainable parameters
        if not parameter.requires_grad:
            continue
        # Returns num of params in tensor
        params = parameter.numel()
        summary.add_row([layer, params])
        total_params += params
    print(summary)
    print(f"Total Params: {total_params}")

In [82]:
# Prints the time difference between training start time and training end time
def print_train_time(start: torch.float, end: torch.float):
    total = end - start
    print(f"Training time: {total:.3f} seconds")
    return total

In [83]:
# print(next(iter(training_dataloader))[0].shape)

In [84]:
def accuracy(predicted,actual):
    _, predictions = torch.max(predicted, dim=1)
    return torch.tensor(torch.sum(predictions==actual).item()/len(predictions))

In [85]:
# model = VGG_11(in_features=3, num_classes=NUM_CLASSES).to(device=device)
model = VGG_13(in_features=3, num_classes=NUM_CLASSES).to(device=device)
# model = VGG_16(in_features=3, num_classes=NUM_CLASSES).to(device=device)
model_summary(model=model)

+-----------------------+------------+
|         Layers        | Parameters |
+-----------------------+------------+
|  conv_layers.0.weight |    1728    |
|   conv_layers.0.bias  |     64     |
|  conv_layers.2.weight |   36864    |
|   conv_layers.2.bias  |     64     |
|  conv_layers.5.weight |   73728    |
|   conv_layers.5.bias  |    128     |
|  conv_layers.7.weight |   147456   |
|   conv_layers.7.bias  |    128     |
| conv_layers.10.weight |   294912   |
|  conv_layers.10.bias  |    256     |
| conv_layers.12.weight |   589824   |
|  conv_layers.12.bias  |    256     |
| conv_layers.15.weight |  1179648   |
|  conv_layers.15.bias  |    512     |
| conv_layers.17.weight |  2359296   |
|  conv_layers.17.bias  |    512     |
| conv_layers.20.weight |  2359296   |
|  conv_layers.20.bias  |    512     |
| conv_layers.22.weight |  2359296   |
|  conv_layers.22.bias  |    512     |
|      fc.0.weight      |  2097152   |
|       fc.0.bias       |    4096    |
|      fc.3.weight      |

In [86]:
# # Training loop
# loss_fn = nn.CrossEntropyLoss()
# optim = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

# train_time_start = time.time()
# for epoch in tqdm(range(EPOCHS)):
#     training_loss = 0

#     # Enumerate over all batches
#     for image, label in training_dataloader:
#         image, label = image.to(device), label.to(device)
#         model.train()

#         # Pass training batch into model for prediction
#         pred = model(image)
#         loss = loss_fn(pred, label)
#         training_loss += loss
        
#         optim.zero_grad()
#         loss.backward()
#         optim.step()

#     # Calculate average training loss
#     training_loss /= len(training_dataloader)

#     validation_loss, validation_acc = 0, 0

#     # Disable weight updating
#     model.eval()
#     with torch.inference_mode():
#         for image, label in validation_dataloader:
#             image, label = image.to(device), label.to(device)

#             pred = model(image)
#             loss = loss_fn(pred, label)
#             validation_loss += loss
            
#             validation_acc += accuracy(pred, label)

#         validation_loss /= len(validation_dataloader)
#         validation_acc /= len(validation_dataloader)
    
#     if (epoch % 10) == 0:
#         print(f"Training Loss: {training_loss:.5f} | Validation Loss: {validation_loss:.5f} | Validation Accuracy: {validation_acc:.5f}")

# train_time_end = time.time()
# total_time = print_train_time(start=train_time_start, end=train_time_end)

In [87]:
# Training loop
loss_fn = nn.CrossEntropyLoss()
# optim = torch.optim.SGD(params=model.parameters(), lr=0.1)
optim = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

train_time_start = time.time()
for epoch in tqdm(range(EPOCHS)):
    training_loss = 0

    # Enumerate over all batches
    for image, label in training_dataloader:
        image, label = image.to(device), label.to(device)
        model.train()

        # Pass training batch into model for prediction
        pred = model(image)
        loss = loss_fn(pred, label)
        training_loss += loss
        
        optim.zero_grad()
        loss.backward()
        optim.step()

    # Calculate average training loss
    training_loss /= len(training_dataloader)

    validation_loss, validation_acc = 0, 0

    # Disable weight updating
    model.eval()
    with torch.inference_mode():
        for image, label in validation_dataloader:
            image, label = image.to(device), label.to(device)

            pred = model(image)
            loss = loss_fn(pred, label)
            validation_loss += loss
            
            validation_acc += accuracy(pred, label)

        validation_loss /= len(validation_dataloader)
        validation_acc /= len(validation_dataloader)
    
    if (epoch % 10) == 0:
        print(f"Training Loss: {training_loss:.5f} | Validation Loss: {validation_loss:.5f} | Validation Accuracy: {validation_acc:.5f}")

train_time_end = time.time()
total_time = print_train_time(start=train_time_start, end=train_time_end)

  2%|▏         | 1/50 [00:21<17:41, 21.66s/it]

Training Loss: 1.98167 | Validation Loss: 1.75815 | Validation Accuracy: 0.29912


 22%|██▏       | 11/50 [03:46<13:15, 20.39s/it]

Training Loss: 0.39555 | Validation Loss: 0.86869 | Validation Accuracy: 0.74385


 42%|████▏     | 21/50 [07:10<09:51, 20.39s/it]

Training Loss: 0.12935 | Validation Loss: 1.01095 | Validation Accuracy: 0.75244


 62%|██████▏   | 31/50 [10:33<06:27, 20.38s/it]

Training Loss: 0.07469 | Validation Loss: 1.28119 | Validation Accuracy: 0.76709


 82%|████████▏ | 41/50 [13:57<03:03, 20.38s/it]

Training Loss: 0.07608 | Validation Loss: 1.59647 | Validation Accuracy: 0.76650


100%|██████████| 50/50 [17:01<00:00, 20.43s/it]

Training time: 1021.328 seconds





In [89]:
test_loss, test_acc = 0, 0
model.eval()

testing_loss, testing_acc = 0, 0

# Disable weight updating
model.eval()
with torch.inference_mode():
    for image, label in testing_dataloader:
        image, label = image.to(device), label.to(device)

        pred = model(image)
        loss = loss_fn(pred, label)
        testing_loss += loss
        
        testing_acc += accuracy(pred, label)

    testing_loss /= len(testing_dataloader)
    testing_acc /= len(testing_dataloader)
    print(f"Testing Loss: {testing_loss:.4f} | Testing Accuracy: {testing_acc:.4f} ")

Testing Loss: 1.8873 | Testing Accuracy: 0.7559 
