In [1]:
# Import os for file operations
import os

# Import numpy for numerical operations
import numpy as np

# Import random for random number generation
import random
 
# Import PIL and matplotlib for image operations
from PIL import Image
import matplotlib.pyplot as plt

# Import torch for GPU Data processing
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchsummary import summary

# Import torchvision for  Data transformation
from torchvision import transforms
import torchvision.models as torchmodels


# Import tqdm for visualizing progress bar
from tqdm.notebook import tqdm

# Import xmltodict for parsing xml files
import xmltodict

In [2]:

# Check and print GPU information if available
if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
    device = torch.device("cuda")
else:
    print("No GPU available, using CPU")
    device = torch.device("cpu")

print(f"Device set to: {device}")

Using GPU: NVIDIA GeForce RTX 3060
Device set to: cuda


In [3]:
def split_dataset(data_dir, train_ratio=0.7, val_ratio=0.15, num_classes=None, seed=None):

    # Set seed for reproducibility if provided
    if seed is not None:
        random.seed(seed)
    # Otherwise, set a random seed
    else:
        random.seed()

    # Get the sorted list of classifications
    classes = sorted(os.listdir(data_dir))

    # Check if a specified number of classes is provided
    if num_classes is not None:
        # Make an upper bound to the number of classes
        classes = classes[:num_classes]

    # Create a dictionary mapping the class breed names to integers
    breed_dict = {breed: i for i, breed in enumerate(classes)}

    # Initialize the 3 splits sets
    train_set = []
    val_set = []
    test_set = []

    # Iterate over the classes
    for breed in classes:
        # Get the list of images for the current class
        img_list = os.listdir(os.path.join(data_dir, breed))
        # Make sure only images are captured
        img_list = [img for img in img_list if img.endswith('.jpg')]
        
        # Shuffle the list of images
        random.shuffle(img_list)

        # Calculate the split indexes
        num_train = int(len(img_list) * train_ratio)
        num_val = int(len(img_list) * val_ratio)
        num_test = len(img_list) - num_train - num_val
        
        # Split the images into the 3 sets
        train_set += [(os.path.join(data_dir, breed, img), breed_dict[breed]) for img in img_list[:num_train]]
        val_set += [(os.path.join(data_dir, breed, img), breed_dict[breed]) for img in img_list[num_train:num_train + num_val]]
        test_set += [(os.path.join(data_dir, breed, img), breed_dict[breed]) for img in img_list[num_train + num_val:]]

    return train_set, val_set, test_set, breed_dict

In [4]:
class DataLoaderClassification(Dataset):
    def __init__(self, image_set, breed_dict, transform=None):
        
        self.image_set = image_set
        self.transform = transform
        self.breed_dict = breed_dict

    def __len__(self):
        return len(self.image_set)

    
    def __getitem__(self, idx):
        img_path = self.image_set[idx][0]
        
        # Make sure the image file exists
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Image file not found: {img_path}")
    
        # Open the image
        img = Image.open(img_path)
        
        # Get the respective bounding box file path
        bbox_path = img_path.replace('Images', 'Annotation').removesuffix('.jpg') 
        
        # Check if the bounding box file exists
        if not os.path.exists(bbox_path):
            raise FileNotFoundError(f"Bounding box file not found: {bbox_path}")
        
        # Parse the bounding box XML file
        with open(bbox_path) as fd:
            doc = xmltodict.parse(fd.read())
        
        # Handle multiple objects in the annotation
        objects = doc['annotation']['object']
        if isinstance(objects, list):
            bndbox = objects[0]['bndbox']  # Use the first object's bounding box
        else:
            bndbox = objects['bndbox']
        
        xmin = int(bndbox['xmin'])
        ymin = int(bndbox['ymin'])
        xmax = int(bndbox['xmax'])
        ymax = int(bndbox['ymax'])
            
        # Crop the image using the bounding box coordinates
        img = img.crop((xmin, ymin, xmax, ymax))
        
        # Apply transformation if any
        if self.transform:
            img = self.transform(img)
        
        # Get the label from the image path using the breed_to_label dictionary
        label = self.breed_dict[img_path.split('/')[-2]]
        
        return img, label

In [5]:
# Define the mean and std standards to be used consistently 
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Define the transformation to be applied on the training set
train_transform = transforms.Compose([
    # We perform data augmentation in the training set to reduce overfitting
    #transforms.RandomResizedCrop(128),
    #transforms.RandomHorizontalFlip(),
    #transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.Resize((128, 128)),
    transforms.Lambda(lambda img: img.convert('RGB')),  
    transforms.ToTensor(),
    # Normalize the image to the defined mean and std
    #transforms.Normalize(mean, std)
])
# Note we resize before converting to tensor, since in PIL format it is easier to resize and less computationally expensive


# Define the transformation to be applied on the validation set
val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.Lambda(lambda img: img.convert('RGB')), 
    transforms.ToTensor(),
    #transforms.Normalize(mean, std)
])


# Define the dataset image directory
directory = '/opt/nfs/shared/images/ImageNetDogs/Images'

# Split the dataset using the split_dataset function
train_set, val_set, test_set, breed_dict = split_dataset(directory, train_ratio=0.7, val_ratio=0.15)

# Set the number of classes
num_classes = len(breed_dict)

# Reverse dictionary to map labels to breed names
label_dict = {label: breed for breed, label in breed_dict.items()}

# Initialize the DataLoader class instances for the train, val and test sets
train_loader = DataLoaderClassification(train_set, breed_dict, transform=train_transform)
val_loader = DataLoaderClassification(val_set, breed_dict, transform=val_transform)
test_loader = DataLoaderClassification(test_set, breed_dict, transform=val_transform)

# Define the batch size
batch_size = 64

# Create the DataLoader objects for the train, val and test sets
train_loader = DataLoader(train_loader, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_loader, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_loader, batch_size=batch_size, shuffle=False)


# Verify the data loaders
total_samples = int(os.popen(f'find {directory} -type f | wc -l').read().strip())
train_samples = len(train_loader.dataset)
val_samples = len(val_loader.dataset)
test_samples = len(test_loader.dataset)
print(f"Total samples in the dataset: {total_samples}")
print(f"Samples in the training set: {train_samples}")
print(f"Samples in the validation set: {val_samples}")
print(f"Samples in the test set: {test_samples}")

if total_samples == (train_samples + val_samples + test_samples):
    print("Data split successful")
else:
    print("Data split unsuccessful")
    print(f"Total samples is meant to be {total_samples} but got {train_samples + val_samples + test_samples}")

Total samples in the dataset: 20580
Samples in the training set: 14355
Samples in the validation set: 3025
Samples in the test set: 3200
Data split successful


In [6]:
def evaluate_model(model, val_loader, criterion, device):
    model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_loss = running_val_loss / len(val_loader)
    val_accuracy = correct_val / total_val
    return val_loss, val_accuracy

In [7]:


def compare_models(models_paths, test_loader):
    # Define the criterion
    criterion = nn.CrossEntropyLoss()

    # Create a list to store accuracy and loss for each model
    accuracies = []
    losses = []

    # Iterate over the models
    for model_path in models_paths:
        # Load the model
        checkpoint = torch.load(model_path)
        model = checkpoint['model_state_dict']
        model = model.to(device)

        # Evaluate the model using the evaluate_model function
        test_loss, test_accuracy = evaluate_model(model, test_loader, criterion, device)

        # Append the accuracy and loss to the lists
        accuracies.append(test_accuracy)
        losses.append(test_loss)
        
    # Plot the accuracy and loss for each model
    fig, axs = plt.subplots(1, 2, figsize=(15, 5))

    # Plot the accuracy
    axs[0].barh(range(len(models_paths)), accuracies, color='skyblue')
    axs[0].set_yticks(range(len(models_paths)))
    axs[0].set_yticklabels([model_path.split('/')[-2] for model_path in models_paths])
    axs[0].set_xlabel('Accuracy')
    axs[0].set_title('Model Accuracy Comparison')

    # Plot the loss
    axs[1].barh(range(len(models_paths)), losses, color='salmon')
    axs[1].set_yticks(range(len(models_paths)))
    axs[1].set_yticklabels([model_path.split('/')[-2] for model_path in models_paths])
    axs[1].set_xlabel('Loss')
    axs[1].set_title('Model Loss Comparison')

    plt.tight_layout()
    plt.show()

    # Print the accuracy and loss for each model
    for model_path, accuracy, loss in zip(models_paths, accuracies, losses):
        print(f"Model: {model_path.split('/')[-2]}")
        print(f"Accuracy: {accuracy:.2f}")
        print(f"Loss: {loss:.4f}")
        print()
    
    return 

# Define the paths of the models
models_paths = [
    './Model/Simple-CNN/simple-model-best.pth',
    './Model/Improved-CNN/improved-model-best.pth',
    './Model/Deep-CNN/deep-model-best.pth',
    './Model/VGG16/vgg16-bn-model-best.pth',
    './Model/VGG16/vgg16-1-model-best.pth',
    './Model/VGG16/vgg16-2-model-best.pth'
]

# Assuming test_loader and device are already defined
compare_models(models_paths, test_loader)

AttributeError: 'collections.OrderedDict' object has no attribute 'to'