In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pip install grad-cam

Collecting grad-cam
  Using cached grad-cam-1.5.4.tar.gz (7.8 MB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ttach (from grad-cam)
  Downloading ttach-0.0.3-py3-none-any.whl.metadata (5.2 kB)
Downloading ttach-0.0.3-py3-none-any.whl (9.8 kB)
Building wheels for collected packages: grad-cam
  Building wheel for grad-cam (pyproject.toml) ... [?25l[?25hdone
  Created wheel for grad-cam: filename=grad_cam-1.5.4-py3-none-any.whl size=39648 sha256=afd171562d8aaa7dbccc27fd07b98859b740b08b451a34dc513a9ef0932717ec
  Stored in directory: /root/.cache/pip/wheels/50/b0/82/1f97b5348c7fe9f0ce0ba18497202cafa5dec4562bd5292680
Successfully built grad-cam
Installing collected packages: ttach, grad-cam
Successfully installed grad-cam-1.5.4 ttach-0.0.3


In [None]:
pip install --upgrade grad-cam



In [None]:
import os
import tarfile
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np

In [None]:
tar_path = r'/content/drive/MyDrive/CSE366/1/101_ObjectCategories.tar.gz'
extract_path = r'/content/drive/MyDrive/CSE366/2/101_ObjectCategories'

# Extract the dataset
if not os.path.exists(extract_path):
    with tarfile.open(tar_path, 'r:gz') as tar:
        tar.extractall(path=os.path.dirname(extract_path))
    print("Extraction complete.")
else:
    print("Dataset already extracted.")

# Path to the dataset
root_dir = extract_path

Extraction complete.


In [None]:
# Inspect the dataset structure
classes = os.listdir(root_dir)
print(f"Found {len(classes)} classes: {classes[:5]}...")


Found 102 classes: ['BACKGROUND_Google', 'Faces', 'Faces_easy', 'Leopards', 'Motorbikes']...


In [None]:
# Ensure no unexpected folders are included
if 'BACKGROUND_Google' in classes:
    print("Excluding 'BACKGROUND_Google' class.")
    classes.remove('BACKGROUND_Google')

Excluding 'BACKGROUND_Google' class.


In [None]:
# Transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
from PIL import Image
import os

for root, _, files in os.walk(root_dir):
    for file in files:
        try:
            img_path = os.path.join(root, file)
            with Image.open(img_path) as img:
                img.verify()  # Verify if it's a valid image
        except Exception as e:
            print(f"Invalid file: {img_path}, Error: {e}")

In [None]:
# Load dataset
dataset = datasets.ImageFolder(root=root_dir, transform=transform)

# Verify class mappings
print(f"Class-to-index mapping: {dataset.class_to_idx}")

# Split dataset
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_data, val_data, test_data = random_split(dataset, [train_size, val_size, test_size])

# Data loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

Class-to-index mapping: {'BACKGROUND_Google': 0, 'Faces': 1, 'Faces_easy': 2, 'Leopards': 3, 'Motorbikes': 4, 'accordion': 5, 'airplanes': 6, 'anchor': 7, 'ant': 8, 'barrel': 9, 'bass': 10, 'beaver': 11, 'binocular': 12, 'bonsai': 13, 'brain': 14, 'brontosaurus': 15, 'buddha': 16, 'butterfly': 17, 'camera': 18, 'cannon': 19, 'car_side': 20, 'ceiling_fan': 21, 'cellphone': 22, 'chair': 23, 'chandelier': 24, 'cougar_body': 25, 'cougar_face': 26, 'crab': 27, 'crayfish': 28, 'crocodile': 29, 'crocodile_head': 30, 'cup': 31, 'dalmatian': 32, 'dollar_bill': 33, 'dolphin': 34, 'dragonfly': 35, 'electric_guitar': 36, 'elephant': 37, 'emu': 38, 'euphonium': 39, 'ewer': 40, 'ferry': 41, 'flamingo': 42, 'flamingo_head': 43, 'garfield': 44, 'gerenuk': 45, 'gramophone': 46, 'grand_piano': 47, 'hawksbill': 48, 'headphone': 49, 'hedgehog': 50, 'helicopter': 51, 'ibis': 52, 'inline_skate': 53, 'joshua_tree': 54, 'kangaroo': 55, 'ketch': 56, 'lamp': 57, 'laptop': 58, 'llama': 59, 'lobster': 60, 'lotus'

In [None]:
# Step 2: Model Selection
# Adjust model for correct number of classes
num_classes = len(dataset.classes)
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Adjust for detected classes


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 128MB/s]


In [None]:
# Step 3: Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

for epoch in range(10):
    model.train()
    train_loss = 0.0
    for batch_idx, (images, labels) in enumerate(train_loader):
        print(f"Processing batch {batch_idx + 1}/{len(train_loader)}")
        try:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
        except Exception as e:
            print(f"Skipping problematic batch: {e}")
            continue

    print(f"Epoch {epoch + 1}, Loss: {train_loss / len(train_loader):.4f}")


Processing batch 1/229
Processing batch 2/229
Processing batch 3/229
Processing batch 4/229
Processing batch 5/229
Processing batch 6/229
Processing batch 7/229
Processing batch 8/229
Processing batch 9/229
Processing batch 10/229
Processing batch 11/229
Processing batch 12/229
Processing batch 13/229
Processing batch 14/229
Processing batch 15/229
Processing batch 16/229
Processing batch 17/229
Processing batch 18/229
Processing batch 19/229
Processing batch 20/229
Processing batch 21/229
Processing batch 22/229
Processing batch 23/229
Processing batch 24/229
Processing batch 25/229
Processing batch 26/229
Processing batch 27/229
Processing batch 28/229
Processing batch 29/229
Processing batch 30/229
Processing batch 31/229
Processing batch 32/229
Processing batch 33/229
Processing batch 34/229
Processing batch 35/229
Processing batch 36/229
Processing batch 37/229
Processing batch 38/229
Processing batch 39/229
Processing batch 40/229
Processing batch 41/229
Processing batch 42/229
P

In [None]:
# Step 4: Validation
model.eval()
val_loss = 0.0
val_correct = 0
val_total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        val_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        val_correct += (preds == labels).sum().item()
        val_total += labels.size(0)

    print(f"Validation Loss: {val_loss / len(val_loader):.4f}, Accuracy: {val_correct / val_total:.4f}")


In [None]:
# Step 5: Testing
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# Confusion matrix and classification report
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", classification_report(y_true, y_pred))

In [None]:
# Step 6: t-SNE Visualization
features = []
labels_list = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        output = model(images)
        features.append(output.cpu())
        labels_list.append(labels)

features = torch.cat(features).numpy()
labels_list = torch.cat(labels_list).numpy()

tsne = TSNE(n_components=2, random_state=42)
reduced_features = tsne.fit_transform(features)

plt.scatter(reduced_features[:, 0], reduced_features[:, 1], c=labels_list, cmap='tab10')
plt.colorbar()
plt.show()


In [None]:
# Step 7: Grad-CAM Visualization
# Fix for GradCAM target layer and arguments
cam = GradCAM(model=model, target_layers=[model.layer4[-1]])

for images, labels in test_loader:
    images = images.to(device)
    targets = [ClassifierOutputTarget(labels[0].item())]

    try:
        # Convert image to numpy array in [0, 1] range for visualization
        input_image = images[0].permute(1, 2, 0).cpu().numpy()
        input_image = (input_image - input_image.min()) / (input_image.max() - input_image.min())

        grayscale_cam = cam(input_tensor=images, targets=targets)[0]
        cam_image = show_cam_on_image(input_image, grayscale_cam, use_rgb=True)
        plt.imshow(cam_image)
        plt.title(f"Grad-CAM for Class: {labels[0].item()}")
        plt.axis('off')
        plt.show()
    except Exception as e:
        print(f"Error during Grad-CAM generation: {e}")
    break