#### This notebook trains our baseline model

Imports

In [1]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import os
os.chdir("../models")
from model import CustomCNN
from common_utils import set_seed, EarlyStopper, train, get_mean_rgb, CustomTransform

# set seed
set_seed(42)

Initialise model and dataset

In [2]:
model = CustomCNN() # initialise model

mean_rgb = get_mean_rgb(datasets.Flowers102(root='../data', split='test', download=True, transform=transforms.ToTensor())) # get mean rgb values of dataset
transform = CustomTransform(mean_rgb) # initialise transform

# load data
train_dataset = datasets.Flowers102(root='../data', split='test', download=True, transform=transform) 
val_dataset = datasets.Flowers102(root='../data', split='val', download=True, transform=transform) 
test_dataset = datasets.Flowers102(root='../data', split='train', download=True, transform=transform)
# NOTE: Due to a bug with the Flowers102 dataset, the train and test splits are swapped

# initialise dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Specify hyperparameters

In [3]:
lr = 0.001 # learning rate
optimiser = torch.optim.Adam(model.parameters(), lr=lr) # initialise optimiser
loss = torch.nn.CrossEntropyLoss() # initialise loss function

if torch.cuda.is_available(): # nvidia gpu
    device = torch.device("cuda")
elif torch.backends.mps.is_available(): # apple gpu
    device = torch.device("mps")
else:
    device = torch.device("cpu")
model.to(device)

epochs = 100 # number of epochs
early_stopper = EarlyStopper(patience=10) # initialise early stopper


# Make directory to save baseline model
baseline_model_path = "./saved_models/baseline_model"
if not os.path.exists(baseline_model_path):
    os.mkdir(baseline_model_path)

# Define the device-specific path
device_type = None
if device == torch.device("cuda"):
    device_type = "cuda"
elif device == torch.device("mps"):
    device_type = "mps"
else:
    device_type = "cpu"

# Construct the full path
device_path = os.path.join(baseline_model_path, device_type)

# Create the directory if it doesn't exist
if not os.path.exists(device_path):
    os.mkdir(device_path)

Train the model

In [4]:
train_loss_list, val_loss_list, val_acc_list = train(model, train_dataloader, val_dataloader, optimiser, loss, device, epochs, early_stopper, device_path) # train model

Epoch 1/100:   0%|          | 0/193 [00:00<?, ?it/s]

Epoch 1/100: 100%|██████████| 193/193 [01:17<00:00,  2.49it/s, Training loss=4.4661]


Epoch 1/100 took 83.72s | Train loss: 4.4661 | Val loss: 4.6956 | Val accuracy: 0.98% | EarlyStopper count: 0


Epoch 2/100: 100%|██████████| 193/193 [01:13<00:00,  2.63it/s, Training loss=4.3915]


Epoch 2/100 took 80.09s | Train loss: 4.3915 | Val loss: 4.6821 | Val accuracy: 1.96% | EarlyStopper count: 0


Epoch 3/100: 100%|██████████| 193/193 [01:13<00:00,  2.64it/s, Training loss=4.0872]


Epoch 3/100 took 79.55s | Train loss: 4.0872 | Val loss: 4.3190 | Val accuracy: 2.55% | EarlyStopper count: 0


Epoch 4/100: 100%|██████████| 193/193 [01:13<00:00,  2.64it/s, Training loss=3.9260]


Epoch 4/100 took 79.39s | Train loss: 3.9260 | Val loss: 4.2229 | Val accuracy: 2.45% | EarlyStopper count: 0


Epoch 5/100: 100%|██████████| 193/193 [01:17<00:00,  2.49it/s, Training loss=3.7876]


Epoch 5/100 took 83.79s | Train loss: 3.7876 | Val loss: 4.0558 | Val accuracy: 5.29% | EarlyStopper count: 0


Epoch 6/100: 100%|██████████| 193/193 [01:13<00:00,  2.64it/s, Training loss=3.6581]


Epoch 6/100 took 79.54s | Train loss: 3.6581 | Val loss: 3.8943 | Val accuracy: 5.88% | EarlyStopper count: 0


Epoch 7/100: 100%|██████████| 193/193 [01:12<00:00,  2.66it/s, Training loss=3.5408]


Epoch 7/100 took 78.83s | Train loss: 3.5408 | Val loss: 3.7368 | Val accuracy: 8.82% | EarlyStopper count: 0


Epoch 8/100: 100%|██████████| 193/193 [01:13<00:00,  2.64it/s, Training loss=3.4373]


Epoch 8/100 took 79.23s | Train loss: 3.4373 | Val loss: 3.6399 | Val accuracy: 9.22% | EarlyStopper count: 0


Epoch 9/100: 100%|██████████| 193/193 [01:12<00:00,  2.65it/s, Training loss=3.3196]


Epoch 9/100 took 79.05s | Train loss: 3.3196 | Val loss: 3.4839 | Val accuracy: 11.37% | EarlyStopper count: 0


Epoch 10/100: 100%|██████████| 193/193 [01:13<00:00,  2.62it/s, Training loss=3.1896]


Epoch 10/100 took 80.33s | Train loss: 3.1896 | Val loss: 3.5068 | Val accuracy: 12.84% | EarlyStopper count: 0


Epoch 11/100: 100%|██████████| 193/193 [01:13<00:00,  2.64it/s, Training loss=3.0766]


Epoch 11/100 took 79.49s | Train loss: 3.0766 | Val loss: 3.3398 | Val accuracy: 15.20% | EarlyStopper count: 1


Epoch 12/100: 100%|██████████| 193/193 [01:16<00:00,  2.51it/s, Training loss=2.9701]


Epoch 12/100 took 83.21s | Train loss: 2.9701 | Val loss: 3.3001 | Val accuracy: 17.16% | EarlyStopper count: 0


Epoch 13/100: 100%|██████████| 193/193 [01:20<00:00,  2.41it/s, Training loss=2.8453]


Epoch 13/100 took 86.56s | Train loss: 2.8453 | Val loss: 3.2458 | Val accuracy: 20.29% | EarlyStopper count: 0


Epoch 14/100: 100%|██████████| 193/193 [01:19<00:00,  2.43it/s, Training loss=2.7633]


Epoch 14/100 took 86.82s | Train loss: 2.7633 | Val loss: 3.1829 | Val accuracy: 21.37% | EarlyStopper count: 0


Epoch 15/100:  15%|█▍        | 28/193 [00:12<01:13,  2.25it/s, Training loss=2.6252]


KeyboardInterrupt: 

Plot train, test loss and test accuracy

In [None]:
# plot the graphs
import matplotlib.pyplot as plt
plt.plot(train_loss_list, label="train loss")
plt.plot(val_loss_list, label="val loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

plt.plot(val_acc_list, label="val accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()