<a href="https://colab.research.google.com/github/Dom-Isbis/ImageClassificationCIFAR-10/blob/main/COMP472_CIFAR_10_Image_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Managing imports
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Subset
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

# Dataset Overview

* Initialize transformer to curate input dataset to ImageNet normalization standards.
* Create 10 object classes with each 500 training and 100 test data, to conduct experiment on.

In [24]:
# Transform input to 224 x 224 and apply ImageNet normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Download CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Select the first 500 training images and 100 test images per class
def get_class_indices(dataset, num_per_class):
    class_indices = {i: [] for i in range(10)}

    for idx, (_, label) in enumerate(dataset):
        if len(class_indices[label]) < num_per_class:
            class_indices[label].append(idx)

    selected_indices = [idx for indices in class_indices.values() for idx in indices]

    return selected_indices

train_indices = get_class_indices(train_dataset, 500)
test_indices = get_class_indices(test_dataset, 100)

# Stitches the sub dataset of 500:100 together across all 10 object classes in the CIFAR-10 dataset.
train_subset = Subset(train_dataset, train_indices)
test_subset = Subset(test_dataset, test_indices)

train_loader = torch.utils.data.DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_subset, batch_size=64, shuffle=False, num_workers=2)

print(f'Training subset size: {len(train_subset)}')
print(f'Test subset size: {len(test_subset)}')

Files already downloaded and verified
Files already downloaded and verified
Training subset size: 5000
Test subset size: 1000


In [27]:
# Load pre-trained ResNet-18 model using the new weights parameter
resnet18 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Remove the final classification layer (fc layer)
resnet18 = nn.Sequential(*list(resnet18.children())[:-1])

# Model: CPU -> GPU, else GPU (for faster computation in parallel)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet18.to(device)

def extract_features(loader, model):
    model.eval()
    features = []
    labels = []

    with torch.no_grad():  # Disable gradient calculation for speed
        for inputs, targets in loader:
            inputs = inputs.to(device)
            outputs = model(inputs)  # Extract features
            features.append(outputs.view(outputs.size(0), -1).cpu())  # Flatten and move to CPU
            labels.append(targets)

    features = torch.cat(features, dim=0)
    labels = torch.cat(labels, dim=0)

    return features, labels

# Extract features for both train and test sets
train_features, train_labels = extract_features(train_loader, resnet18)
test_features, test_labels = extract_features(test_loader, resnet18)

print(f'Training features shape: {train_features.shape}')
print(f'Test features shape: {test_features.shape}')

Training features shape: torch.Size([5000, 512])
Test features shape: torch.Size([1000, 512])


In [28]:
# Convert tensors to numpy arrays
train_features_np = train_features.numpy()
test_features_np = test_features.numpy()

# Apply PCA to reduce the features to 50 dimensions
pca = PCA(n_components=50)
train_features_pca = pca.fit_transform(train_features_np)
test_features_pca = pca.transform(test_features_np)

print(f'Training features after PCA: {train_features_pca.shape}')
print(f'Test features after PCA: {test_features_pca.shape}')

Training features after PCA: (5000, 50)
Test features after PCA: (1000, 50)
