In [None]:
from __future__ import annotations

import os 
from pathlib import Path 
from typing import Any, Callable 
from torchvision.datasets.folder import default_loader
from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg
from torchvision.datasets.vision import VisionDataset
from sklearn.metrics import *
import torch.nn as nn

class FGVCAircraft(VisionDataset):
    
    """
    - ``variant``, e.g. Boeing 737-700. A variant collapses all the models that are visually
        indistinguishable into one class. The dataset comprises 100 different variants.
    - ``family``, e.g. Boeing 737. The dataset comprises 70 different families.
    - ``manufacturer``, e.g. Boeing. The dataset comprises 30 different manufacturers.
    """

    _URL = "https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/archives/fgvc-aircraft-2013b.tar.gz"

    def __init__(
        self,
        root: str | Path,
        split: str = "trainval",
        annotation_level: str = "variant",
        transform: Callable | None = None,
        target_transform: Callable | None = None,
        download: bool = False,
        loader: Callable[[str], Any] = default_loader,
    ) -> None:
        super().__init__(root, transform=transform, target_transform=target_transform)
        self._split = verify_str_arg(split, "split", ("train", "val", "trainval", "test"))
        self._annotation_level = verify_str_arg(
            annotation_level, "annotation_level", ("variant", "family", "manufacturer")
        )

        self._data_path = os.path.join(self.root, "fgvc-aircraft-2013b")
        if download:
            self._download()

        if not self._check_exists():
            raise RuntimeError("Dataset not found. download=True")

        annotation_file = os.path.join(
            self._data_path,
            "data",
            {
                "variant": "variants.txt",
                "family": "families.txt",
                "manufacturer": "manufacturers.txt",
            }[self._annotation_level],
        )
        with open(annotation_file) as f:
            self.classes = [line.strip() for line in f]

        self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))

        image_data_folder = os.path.join(self._data_path, "data", "images")
        labels_file = os.path.join(self._data_path, "data", f"images_{self._annotation_level}_{self._split}.txt")

        self._image_files = []
        self._labels = []

        with open(labels_file) as f:
            for line in f:
                image_name, label_name = line.strip().split(" ", 1)
                self._image_files.append(os.path.join(image_data_folder, f"{image_name}.jpg"))
                self._labels.append(self.class_to_idx[label_name])
        self.loader = loader

    def __len__(self) -> int:
        return len(self._image_files)

    def __getitem__(self, idx: int) -> tuple[Any, Any]:
        image_file, label = self._image_files[idx], self._labels[idx]
        image = self.loader(image_file)

        if self.transform:
            image = self.transform(image)

        if self.target_transform:
            label = self.target_transform(label)

        return image, label


    def _download(self) -> None:
        """
        Download the FGVC Aircraft dataset archive and extract it under root.
        """
        if self._check_exists():
            return
        download_and_extract_archive(self._URL, self.root)

    def _check_exists(self) -> bool:
        return os.path.exists(self._data_path) and os.path.isdir(self._data_path)
    
        
class RemoveCopyrightBanner:
    def __init__(self, banner_height=20):
        self.banner_height = banner_height

    def __call__(self, img):
        """
        img: PIL Image
        Returns cropped PIL Image with bottom banner removed
        """
        width, height = img.size
        return img.crop((0, 0, width, height - self.banner_height))

In [None]:
# main.ipynb

# libraries used for CNN Classifier Model
import random
import torch
import torch.nn as nn
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
import seaborn as sns

# Transform Datasets for Training and Testing Use
train_transform = transforms.Compose([
    RemoveCopyrightBanner(20),
    transforms.RandomResizedCrop(
        size=224,
        scale=(0.8, 1.0)   # zoom in/out slightly
    ),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),      
    transforms.ColorJitter(             # simulate colour effects of weather/ bad lighting
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.05
    ),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_transform = transforms.Compose([
    RemoveCopyrightBanner(banner_height=20),   
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Create the dataset, automatically download if missing
train_dataset = FGVCAircraft(
    root="/content",             # Colab working folder
    split="train",               # train/val/test
    annotation_level="variant",  # variant/family/manufacturer
    transform=train_transform,
    download=True                # triggers download if not found
)

val_dataset = FGVCAircraft(      # test dataset
    root="/content",
    split="val",
    annotation_level="variant",
    transform=val_transform,
    download=True
)

# Access length
print(f"{len(train_dataset)} examples for training and {len(val_dataset)} for testing")

In [None]:
# Verify transformation
# Get one sample
img, label = train_dataset[0]
class_name = train_dataset.classes[label]
# Unnormalize for display
mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
std  = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
img = img * std + mean

plt.imshow(img.permute(1, 2, 0).clamp(0, 1))
plt.title(f"Variant: {class_name}")
plt.axis("off")


In [None]:
# Verify data in evaluation dataset
img, label = val_dataset[0]
class_name = val_dataset.classes[label]

# Unnormalize for display
mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
std  = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
img = img * std + mean

# Convert from CHW → HWC
plt.imshow(img.permute(1, 2, 0).clamp(0, 1))
plt.title(f"Variant: {class_name}")
plt.axis("off")

In [None]:
import torch.nn as nn
class DeepCNN(nn.Module):
    def __init__(self, num_classes=8):
        super().__init__()

        def conv_block(in_channels, out_channels):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2)
            )

        self.features = nn.Sequential(
            conv_block(3, 64),    # 224 -> 112
            conv_block(64, 128),  # 112 -> 56
            conv_block(128, 256), # 56 -> 28
            conv_block(256, 512), # 28 -> 14
        )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
from torchvision import models

num_classes = len(train_dataset.classes)
print(num_classes)

# Load pretrained resnet18
resnet18 = models.resnet18(pretrained=True)

# Replace the last fully connected layer
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes)

# Move to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
resnet18 = resnet18.to(device)

# Optional: freeze all layers except last FC
for param in resnet18.parameters():
    param.requires_grad = False
resnet18.fc.requires_grad = True

# Loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet18.fc.parameters(), lr=1e-4)

# define batch size
BATCH_SIZE = 32

# create dataloaders
train_loader = DataLoader(
  train_dataset, 
  batch_size=BATCH_SIZE, 
  shuffle=True
)

val_loader = DataLoader(
  val_dataset, 
  batch_size=BATCH_SIZE
)

# initialize model, loss & optimizer
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")


# function to train model for 1 epoch
def train_one_epoch(model, loader, optimizer, criterion):
  model.train()
  total_loss = 0
  correct = 0
  total = 0

  for images, labels in loader:
      images = images.to(device)
      labels = labels.to(device)

      optimizer.zero_grad()
      outputs = model(images)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      total_loss += loss.item()
      preds = outputs.argmax(dim=1)
      correct += (preds == labels).sum().item()
      total += labels.size(0)

  return total_loss / len(loader), correct / total


def validate(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return total_loss / len(loader), correct / total

images, labels = next(iter(train_loader))
images = images.to(device)

In [None]:
# Run training loop
model = DeepCNN(num_classes).to(device)
EPOCHS = 5

for epoch in range(EPOCHS):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = validate(model, val_loader, criterion)

    print(
        f"Epoch {epoch+1}/{EPOCHS} | "
        f"Train Acc: {train_acc:.3f} | "
        f"Val Acc: {val_acc:.3f}"
    )

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_cnn.pth")
        print(f"Saved new best model at epoch {epoch+1}")

IndentationError: expected an indented block after 'if' statement on line 15 (2452869365.py, line 16)

In [None]:
# Evaluation Layer

@torch.no_grad()
def get_predictions(model, loader):
    model.eval()

    all_preds = []
    all_labels = []

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        preds = outputs.argmax(dim=1)

        all_preds.append(preds.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

    return np.concatenate(all_labels), np.concatenate(all_preds)

In [None]:
y_true, y_pred = get_predictions(model, val_loader)
accuracy = accuracy_score(y_true, y_pred)

precision = precision_score(y_true, y_pred, average="weighted")
recall = recall_score(y_true, y_pred, average="weighted")
f1 = f1_score(y_true, y_pred, average="weighted")

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-score:  {f1:.4f}")

print(classification_report(
    y_true,
    y_pred,
    target_names=train_dataset.classes
))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(12, 10))
sns.heatmap(
    cm,
    cmap="Blues",
    xticklabels=train_dataset.classes,
    yticklabels=train_dataset.classes,
    square=True,
    cbar=True
)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix – Aircraft Classification")
plt.show()

In [None]:
# find misclassified images
misclassified_idx = np.where(y_true != y_pred)[0]
print("Total misclassified images:", len(misclassified_idx))

# visual representation of misclassified images
def show_misclassified(dataset, y_true, y_pred, indices, n=5):
    samples = random.sample(list(indices), n)

    plt.figure(figsize=(15, 3))
    for i, idx in enumerate(samples):
        img, _ = dataset[idx]

        # unnormalize
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
        img = img * std + mean

        plt.subplot(1, n, i+1)
        plt.imshow(img.permute(1,2,0).clamp(0,1))
        plt.title(
            f"True: {train_dataset.classes[y_true[idx]]}\n"
            f"Pred: {train_dataset.classes[y_pred[idx]]}"
        )
        plt.axis("off")

    plt.show()

In [None]:
# Demo Model
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load saved model
model = torch.load("best_cnn.pth")
model = model.to(device)
model.eval()

In [None]:
from PIL import Image
from torchvision import transforms

# Path to your real-world aircraft image
img_path = "test_aircraft.jpg" # replace w any img from evaluation dataset
img = Image.open(img_path).convert("RGB")

# Transform (same as val_transform)
transform = transforms.Compose([
    RemoveCopyrightBanner(20),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

img_tensor = transform(img).unsqueeze(0)  # add batch dimension
img_tensor = img_tensor.to(device)

In [None]:
with torch.no_grad():
    outputs = model(img_tensor)
    predicted_class = outputs.argmax(dim=1).item()
    
# Map index to class name
idx_to_class = {i: c for i, c in enumerate(train_dataset.classes)}
print("Predicted Aircraft Variant:", idx_to_class[predicted_class])


In [None]:
import matplotlib.pyplot as plt

# Unnormalize for display
mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
std = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)

img_show = img_tensor.cpu()[0] * std + mean
plt.imshow(img_show.permute(1,2,0).clamp(0,1))
plt.title(f"Predicted: {idx_to_class[predicted_class]}")
plt.axis("off")
plt.show()
