In [None]:
from google.colab import drive
drive.mount('/content/drive')
!cp -r /content/drive/MyDrive/COMP9517_ZXCZH /content/

Mounted at /content/drive


In [None]:
!pip install efficientnet_pytorch

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from efficientnet_pytorch import EfficientNet
from tqdm import tqdm

# device layout
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")


# dataset allocation
train_dir = '/content/COMP9517_ZXCZH/train'
test_dir = '/content/COMP9517_ZXCZH/test'

# Data enhancement pipeline
train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# data loading
train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
test_dataset = datasets.ImageFolder(test_dir, transform=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=4, pin_memory=True)

# Model initialization
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=15)
model.to(device)

# Training configuration
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4)

# Initialize GradScaler before the training loop
scaler = torch.cuda.amp.GradScaler()

for epoch in range(10):
    model.train()
    train_loss, correct, total = 0, 0, 0
    with tqdm(train_loader, unit="batch") as pbar:
        pbar.set_description(f"Epoch {epoch+1}/10")
        for images, labels in pbar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            # Enable automatic mixing precision context
            with torch.cuda.amp.autocast():
                outputs = model(images)       # FP16/FP32 is automatically selected
                loss = criterion(outputs, labels)

            # Scale the gradient and backpropagate
            scaler.scale(loss).backward()

            # Update parameters (automatic unscale gradient)
            scaler.step(optimizer)

            # Update the pantograph status
            scaler.update()

            # statistical information
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            pbar.set_postfix({
                "loss": f"{train_loss/(total//64):.3f}",
                "acc": f"{100*correct/total:.1f}%"
            })

    # verification stage
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    # Print the epoch summary
    print(f"Val Acc: {100*val_correct/val_total:.1f}%")
    print("-" * 50)

# Overfitting judgment
final_train_acc = 100 * correct / total
final_val_acc = 100 * val_correct / val_total
print(f"\nTraining accuracy: {final_train_acc:.1f}% | Verification accuracy: {final_val_acc:.1f}%")
if final_train_acc - final_val_acc > 10:
    print("Overfitting detected!")
else:
    print("Normal training")

  scaler = torch.cuda.amp.GradScaler()


Loaded pretrained weights for efficientnet-b0


  with torch.cuda.amp.autocast():
Epoch 1/10: 100%|██████████| 150/150 [01:11<00:00,  2.10batch/s, loss=0.666, acc=84.0%]


Val Acc: 95.4%
--------------------------------------------------


Epoch 2/10: 100%|██████████| 150/150 [01:08<00:00,  2.18batch/s, loss=0.138, acc=95.7%]


Val Acc: 96.7%
--------------------------------------------------


Epoch 3/10: 100%|██████████| 150/150 [01:08<00:00,  2.20batch/s, loss=0.090, acc=97.3%]


Val Acc: 97.2%
--------------------------------------------------


Epoch 4/10: 100%|██████████| 150/150 [01:09<00:00,  2.17batch/s, loss=0.067, acc=97.9%]


Val Acc: 97.6%
--------------------------------------------------


Epoch 5/10: 100%|██████████| 150/150 [01:09<00:00,  2.15batch/s, loss=0.057, acc=98.2%]


Val Acc: 97.4%
--------------------------------------------------


Epoch 6/10: 100%|██████████| 150/150 [01:10<00:00,  2.13batch/s, loss=0.047, acc=98.4%]


Val Acc: 97.7%
--------------------------------------------------


Epoch 7/10: 100%|██████████| 150/150 [01:09<00:00,  2.16batch/s, loss=0.044, acc=98.5%]


Val Acc: 96.6%
--------------------------------------------------


Epoch 8/10: 100%|██████████| 150/150 [01:11<00:00,  2.11batch/s, loss=0.035, acc=99.0%]


Val Acc: 97.5%
--------------------------------------------------


Epoch 9/10: 100%|██████████| 150/150 [01:09<00:00,  2.16batch/s, loss=0.032, acc=99.1%]


Val Acc: 97.4%
--------------------------------------------------


Epoch 10/10: 100%|██████████| 150/150 [01:13<00:00,  2.05batch/s, loss=0.030, acc=99.1%]


Val Acc: 97.2%
--------------------------------------------------

Training accuracy: 99.1% | Verification accuracy: 97.2%
Normal training


In [None]:
!pip install grad-cam

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random

from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

# The last convolutional layer in EfficientNet is usually model._blocks[-1]._project_conv
target_layers = [model._blocks[-1]._project_conv]

# Get class names and image paths
class_names = train_dataset.classes
image_paths, labels = zip(*test_dataset.samples)

for class_idx, class_name in enumerate(class_names):
    print(f"Grad-CAM for {class_name}...")

    # Get all image paths for the current class
    class_image_paths = [path for path, label in zip(image_paths, labels) if label == class_idx]
    if not class_image_paths:
        print(f"No images found for class {class_name}")
        continue

    # Randomly select one image
    img_path = random.choice(class_image_paths)
    pil_img = Image.open(img_path).convert("RGB")
    input_tensor = test_transforms(pil_img).unsqueeze(0).to(device)

    # Get model prediction for the image
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        pred_class = output.argmax(dim=1).item()

    # Instantiate Grad-CAM and generate heatmap
    cam = GradCAM(model=model, target_layers=target_layers)
    grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(pred_class)])[0]

    # Convert input image to numpy format for overlaying heatmap
    img_np = np.array(pil_img.resize((224, 224))) / 255.0
    visualization = show_cam_on_image(img_np, grayscale_cam, use_rgb=True)

    # Display results
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.title(f"Original - Label: {class_name}")
    plt.imshow(img_np)
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.title(f"Grad-CAM - Predicted: {class_names[pred_class]}")
    plt.imshow(visualization)
    plt.axis('off')
    plt.show()


# Output the samples of each class about the explanable images
Already hide the outputs

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter
import random
import torch
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

# Last convolutional layer in EfficientNet
target_layers = [model._blocks[-1]._project_conv]
class_names = train_dataset.classes
image_paths, labels = zip(*test_dataset.samples)

def add_noise(img_np, mean=0, std=0.1):
    noise = np.random.normal(mean, std, img_np.shape)
    noisy_img = np.clip(img_np + noise, 0, 1)
    return noisy_img

def add_blur(pil_img):
    return pil_img.filter(ImageFilter.GaussianBlur(radius=4))

def add_occlusion(img_np, box_size=50):
    h, w, _ = img_np.shape
    x = random.randint(0, w - box_size)
    y = random.randint(0, h - box_size)
    img_np[y:y+box_size, x:x+box_size, :] = 0
    return img_np

# Show one image per class
for class_idx, class_name in enumerate(class_names):
    print(f"Grad-CAM for {class_name}...")

    class_image_paths = [path for path, label in zip(image_paths, labels) if label == class_idx]
    if not class_image_paths:
        print(f"No images found for class {class_name}")
        continue

    # Select one random image
    img_path = random.choice(class_image_paths)
    pil_img = Image.open(img_path).convert("RGB")
    original_img = pil_img.resize((224, 224))
    original_np = np.array(original_img) / 255.0

    # Generate three perturbed versions
    noisy_np = add_noise(np.copy(original_np))
    noisy_img = Image.fromarray((noisy_np * 255).astype(np.uint8))

    blurred_img = add_blur(original_img)
    blurred_np = np.array(blurred_img) / 255.0

    occluded_np = add_occlusion(np.copy(original_np))
    occluded_img = Image.fromarray((occluded_np * 255).astype(np.uint8))

    # Collect all versions in a list
    versions = [
        ("Original", original_np, original_img),
        ("Noisy", noisy_np, noisy_img),
        ("Blurred", blurred_np, blurred_img),
        ("Occluded", occluded_np, occluded_img)
    ]

    cam = GradCAM(model=model, target_layers=target_layers)

    plt.figure(figsize=(16, 8))
    for i, (title, img_np, img_pil) in enumerate(versions):
        input_tensor = test_transforms(img_pil).unsqueeze(0).to(device)
        model.eval()
        with torch.no_grad():
            output = model(input_tensor)
            pred_class = output.argmax(dim=1).item()

        grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(pred_class)])[0]
        cam_result = show_cam_on_image(img_np, grayscale_cam, use_rgb=True)

        # Display original or perturbed image
        plt.subplot(2, 4, i + 1)
        plt.title(f"{title} Image")
        plt.imshow(img_np)
        plt.axis('off')

        # Display heatmap
        plt.subplot(2, 4, i + 5)
        plt.title(f"{title} Grad-CAM\nPred: {class_names[pred_class]}")
        plt.imshow(cam_result)
        plt.axis('off')

    plt.tight_layout()
    plt.show()
