<a href="https://colab.research.google.com/github/LaurentTits/ResponsibleTrainingDeepLearning/blob/main/CAMmethod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import cv2
import numpy as np
import os
from PIL import Image

# --- CONFIGURATION ---
BATCH_SIZE = 32
NUM_CLASSES = 100  # mini-ImageNet has 100 classes
LR = 1e-4
EPOCHS = 10
ALPHA = 1.0  # Weight for CAM loss
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- DATASET PREPARATION ---
class MiniImageNetDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.data = []
        self.labels = []
        self.bbox_masks = []

        # Load dataset
        for label in os.listdir(root):
            label_dir = os.path.join(root, label)
            if not os.path.isdir(label_dir):
                continue
            class_idx = int(label)  # Assuming folder names are numeric class indices
            for img_file in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_file)
                bbox_path = img_path.replace(".jpg", "_bbox.npy")  # Assume bbox mask exists

                if os.path.exists(bbox_path):
                    self.data.append(img_path)
                    self.labels.append(class_idx)
                    self.bbox_masks.append(bbox_path)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img = Image.open(self.data[idx]).convert("RGB")
        label = self.labels[idx]
        bbox = np.load(self.bbox_masks[idx])  # Load bounding box mask (grayscale)

        if self.transform:
            img = self.transform(img)

        return img, (label, torch.tensor(bbox, dtype=torch.float32))

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = MiniImageNetDataset(root="path_to_mini_imagenet/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

# --- VGG16 MODEL WITH CAM ---
class VGGWithCAM(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super().__init__()
        self.vgg = models.vgg16(pretrained=True)
        self.features = self.vgg.features
        self.classifier = nn.Linear(512, num_classes)

    def forward(self, x):
        features = self.features(x)
        pooled = F.adaptive_avg_pool2d(features, (1, 1))
        pooled = torch.flatten(pooled, 1)
        logits = self.classifier(pooled)
        return logits, features

# --- COMPUTE CAM ---
def compute_cam(feature_maps, class_idx):
    weights = model.classifier.weight[class_idx]
    cam = torch.einsum("chw,c->hw", feature_maps[0], weights)
    cam = F.relu(cam)
    cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)
    return cam

# --- PREPROCESS BOUNDING BOX ---
def preprocess_bbox(mask, size=(7, 7), blur_kernel=5):
    mask = cv2.resize(mask, size, interpolation=cv2.INTER_LINEAR)
    mask = cv2.GaussianBlur(mask, (blur_kernel, blur_kernel), 0)
    mask = (mask - mask.min()) / (mask.max() - mask.min() + 1e-6)
    return torch.tensor(mask, dtype=torch.float32)

# --- CUSTOM LOSS FUNCTION ---
class CustomLoss(nn.Module):
    def __init__(self, alpha=ALPHA):
        super().__init__()
        self.classification_loss = nn.CrossEntropyLoss()
        self.alpha = alpha

    def forward(self, logits, features, targets, gt_bbox_masks):
        cls_loss = self.classification_loss(logits, targets)
        mse_loss = 0.0

        for i in range(len(targets)):
            cam = compute_cam(features[i].unsqueeze(0), targets[i])
            blurred_bbox = preprocess_bbox(gt_bbox_masks[i])
            mse_loss += F.mse_loss(cam, blurred_bbox)

        mse_loss /= len(targets)
        return cls_loss + self.alpha * mse_loss

# --- TRAINING SETUP ---
model = VGGWithCAM(num_classes=NUM_CLASSES).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LR)
criterion = CustomLoss(alpha=ALPHA)

# --- TRAINING LOOP ---
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for images, (labels, gt_masks) in train_loader:
        images, labels, gt_masks = images.to(DEVICE), labels.to(DEVICE), gt_masks.to(DEVICE)

        optimizer.zero_grad()
        logits, features = model(images)
        loss = criterion(logits, features, labels, gt_masks)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {total_loss / len(train_loader):.4f}")


FileNotFoundError: [Errno 2] No such file or directory: 'path_to_mini_imagenet/train'