In [1]:
# Import Libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from torchcam.methods import GradCAM
from torchcam.utils import overlay_mask
from PIL import Image
# import os

# Enable synchronous CUDA for debugging
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

# Verify CUDA setup
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Device Name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


CUDA Available: True
CUDA Device Name: NVIDIA GeForce RTX 4050 Laptop GPU


In [2]:
# Data Loading and Preprocessing
# Assuming CIFAR-10; replace with actual dataset from the provided link
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

In [3]:
# Define CNN Architecture
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # Input: 3x32x32 (RGB images)
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)  # Output: 32x32x32
        self.pool = nn.MaxPool2d(2, 2)               # Output: 32x16x16
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1) # Output: 64x16x16
        # Pooling: 64x8x8
        self.fc1 = nn.Linear(64 * 8 * 8, 128)        # Fully connected layer
        self.fc2 = nn.Linear(128, 10)                # Output: 10 classes
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # No softmax here; applied in loss function
        return x



# Justification:
 - 32 filters in conv1 to capture basic features (edges, textures).
 - 64 filters in conv2 for more complex patterns.
 - 3x3 kernels are standard, with padding to preserve spatial dimensions.
 - Max pooling reduces size, controlling overfitting.
 - FC layers reduce features to 128, then to 10 classes.


In [4]:
# Initialize Model, Loss, and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [5]:
import torch
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Device Count: {torch.cuda.device_count()}")
print(f"CUDA Device Name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

CUDA Available: True
CUDA Device Count: 1
CUDA Device Name: NVIDIA GeForce RTX 4050 Laptop GPU


In [6]:
# Training Loop
num_epochs = 5
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss / len(trainloader):.3f}')


Epoch 1, Loss: 1.348
Epoch 2, Loss: 0.970
Epoch 3, Loss: 0.810
Epoch 4, Loss: 0.703
Epoch 5, Loss: 0.601


In [None]:

# Evaluation
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy on test set: {100 * correct / total:.2f}%')


Accuracy on test set: 70.75%


: 

In [None]:

# Grad-CAM Visualization
# Choose Grad-CAM because it works with any CNN layer and highlights class-specific regions
cam_extractor = GradCAM(model, target_layer='conv2')  # Focus on last conv layer
sample_image, sample_label = next(iter(testloader))
sample_image, sample_label = sample_image[0].unsqueeze(0).to(device), sample_label[0].item()

# Forward pass
model.eval()
out = model(sample_image)
pred_class = out.argmax().item()

# Generate CAM
cam = cam_extractor(class_idx=pred_class, scores=out)
cam = cam[0].cpu().numpy()  # First image in batch

# Normalize CAM
cam = (cam - cam.min()) / (cam.max() - cam.min())

# Overlay CAM on original image
sample_img_np = sample_image[0].cpu().numpy().transpose(1, 2, 0)  # CHW to HWC
sample_img_np = (sample_img_np * 0.5 + 0.5)  # Denormalize

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(sample_img_np)
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(sample_img_np)
plt.imshow(cam, cmap='jet', alpha=0.5)
plt.title(f'Grad-CAM (Class {pred_class})')
plt.axis('off')
plt.show()

# Grad-CAM Explanation:
# - Math: Grad-CAM uses gradients of the target class score w.r.t. feature maps,
#   weighted by global average pooling, to highlight important regions.
# - Choice: More flexible than vanilla CAM, applicable to any layer, robust for visualization.

TypeError: Invalid shape (1, 16, 16) for image data