Part 2: Model Deployment

In [1]:
'''
Since we have done eda and some preprocessing in eda notebook, we now moving forward to model training. we have also loaded previously
used of everything into here to continue, and also added some imports to use for model training.

'''

# PyTorch
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn # for loading model, model training
import torch.optim as optim # model training

# Torchvision
from torchvision.datasets import Flowers102
from torchvision import models

# Albumentations
import albumentations as aug
from albumentations.pytorch import ToTensorV2

# Others
import numpy as np
import matplotlib.pyplot as plt


In [3]:
from google.colab import drive # mount drive first to access google drive
drive.mount('/content/drive')





Mounted at /content/drive


In [13]:
'''
we now save each split of the dataset into drive, that way we dont have to download everytime, we can just load from drive

'''

from torchvision.datasets import Flowers102
import torch
import os


save_dir = '/content/drive/MyDrive/flowers102'
os.makedirs(save_dir, exist_ok=True)  # ensure folder exists

splits = ['train', 'val', 'test']

for split in splits:
    ds = Flowers102(root='content', split=split, download=True)
    torch.save(ds, os.path.join(save_dir, f'{split}_dataset.pt'))  # save each split with a filename


From here to before model training, we load all the albumentations, dataloader and preprocess etc. from previous notebook to continue our model training, all here are the same from previous.

In [14]:
train_augment = aug.Compose([ #we only augment training set, this way the model will see different varieties, to learn better.
    aug.Resize(224, 224), #resize
    aug.HorizontalFlip(p=0.5), #horizontal flip
    aug.Rotate(limit=15, p=0.5),#rotation
    aug.RandomBrightnessContrast(p=0.3),#adjust brightness and contrast
    aug.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), #normalize, values are standard
    ToTensorV2() # convert numpy array to tensor
])

In [15]:
"""
Validation and test set no transfomrs, to keep it as original as it is.
"""

val_test_noaug = aug.Compose([
    aug.Resize(224, 224), #resize
    aug.Normalize(
        mean=(0.485, 0.456, 0.406),  #normalize, values are standard
        std=(0.229, 0.224, 0.225)
    ),
    ToTensorV2()# convert numpy array to tensor
])


In [16]:


class Albumentations(Dataset):
    """
    Wrapper to apply Albumentations transforms to a torchvision dataset.
    """

    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        image = np.array(image)  # Convert PIL to NumPy
        if self.transform:
            image = self.transform(image=image)["image"]
        return image, label


In [18]:
'''
Load the dataset from drive
Same from previous notebook eda

'''

save_dir = '/content/drive/MyDrive/flowers102'

with torch.serialization.safe_globals([Flowers102]):
    train = torch.load(f'{save_dir}/train_dataset.pt', weights_only=False) # load each split
    val   = torch.load(f'{save_dir}/val_dataset.pt', weights_only=False) # weight_only = False , allows loading full python objects, like custom class Flowers102.
    test  = torch.load(f'{save_dir}/test_dataset.pt', weights_only=False)
# Weight_only = False
# unpickling arbitrary code can run malicious code if file not trushworthy.
# Required here because the .pt file contains a custom Dataset object, not just raw tensors.

train_data = Albumentations(train, train_augment)
val_data   = Albumentations(val, val_test_noaug)
test_data  = Albumentations(test, val_test_noaug)



In [19]:
"""
Create PyTorch DataLoaders for training, validation, and testing.
"""

train_loader = DataLoader(
    train_data,
    batch_size=16, # number of samples processed together before model update its parameters, 16 is good as too big will require more memory
    shuffle=True,        # order randomised
    num_workers=2  # numbers of parallel processes to load batches
)

val_loader = DataLoader(
    val_data,
    batch_size=16,
    shuffle=False,
    num_workers=2
)

test_loader = DataLoader(
    test_data,
    batch_size=16,
    shuffle=False,
    num_workers=2
)


Up until here from the previous notebook, now moving forward to model deployment

In [27]:
import random

# Reproducibility function

def set_seed(seed=42):
    """
    Set random seeds for reproducibility.

    Args:
        seed (int): seed value to set for torch, numpy, random.
    """
    # seeds are used for ensuring are consistent everytime we train, useful comparing result since we have
    # done those preprocessing earlier. without seeds, might get different training results
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    # Ensure deterministic behavior in CuDNN
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()

In [29]:

# Model setup

# Load pre-trained ResNet50 model
# Reason on choosing: deep enough to capture complex flower features, pretrained on ImageNet, low level features are already well learned, easy to fine-tune
model = models.resnet50(pretrained=True)

In [30]:
# Freeze early layers to retain learned features; fine-tune only last block and classifier, to reduce time and overfitting
# Freeze to learn the complex features
for name, param in model.named_parameters():
    if "layer4" not in name:  # layer4 is last convolutional block
        param.requires_grad = False

# Replace classifier for 102 flower classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 102)

In [33]:
from collections import Counter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) # if have gpu then use gpu else use cpu



# Loss function with class weighting

# compute class weights to handle class imbalances
labels_list = [label for _, label in train_data]  # collect all training labels
class_counts = Counter(labels_list)
total_count = sum(class_counts.values())# count occurence for each class
# Inverse frequency weighting
class_weights = [total_count / class_counts[i] for i in range(102)]
class_weights = torch.tensor(class_weights).float().to(device)
# uses crossentropyloss to handle class imbalances, since we have already identified class imbalance exist,
# give higher weights to less sample, lower weights to more samples
criterion = nn.CrossEntropyLoss(weight=class_weights)

In [35]:

# Optimizer & scheduler

optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)# adam is most common and typically used for updating model, update non freeze layer
# parameters using gradients from backpropagation
# regularize weights and improve generalization
# reduce learning rate if validation loss plateaus
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

In [43]:
from tqdm import tqdm # for seeing real time progress
# Training function

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=15, patience=3, save_path='best_model.pt'):
    """
    Train a model with early stopping and checkpointing.

    Args:
        model: PyTorch model to train
        train_loader, val_loader: DataLoaders for training and validation
        criterion: loss function
        optimizer: optimizer
        scheduler: learning rate scheduler
        num_epochs: maximum number of epochs
        patience: early stopping patience (in epochs)
        save_path: path to save the best model
    """
    best_val_loss = float('inf')  # initialize best validation loss
    epochs_no_improve = 0  # counter for early stopping

    for epoch in range(num_epochs):

        # Training loop
        model.train()  # set model to training mode
        train_loss = 0
        correct = 0
        total = 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Train"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()  # clear gradients
            outputs = model(images)  # forward pass
            loss = criterion(outputs, labels)  # compute loss
            loss.backward()  # backpropagation
            optimizer.step()  # update weights

            train_loss += loss.item() * images.size(0)  # accumulate batch loss
            _, preds = torch.max(outputs, 1)  # get predicted class
            correct += (preds == labels).sum().item()  # count correct predictions
            total += labels.size(0)  # accumulate total samples

        train_loss /= total  # average training loss
        train_acc = correct / total  # training accuracy


        # Validation loop

        model.eval()  # set model to evaluation mode
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():  # disable gradient computation
            for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Val"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        val_loss /= total  # average validation loss
        val_acc = correct / total  # validation accuracy

        print(f"Epoch {epoch+1}: Train Loss {train_loss:.4f} Accuracy {train_acc:.4f} | Val Loss {val_loss:.4f} Accuracy {val_acc:.4f}")

        # Scheduler step
        scheduler.step(val_loss)  # adjust LR if validation loss plateaus

        # Early stopping & checkpoint
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), save_path)  # save best model
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

In [45]:
# model training

train_model(
    model,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    scheduler,
    num_epochs=15,
    patience=3,
    save_path='/content/drive/MyDrive/flowers102_resnet50_best.pt'
)


Epoch 1/15 - Train:  28%|██▊       | 18/64 [01:50<04:42,  6.14s/it]


KeyboardInterrupt: 