In [1]:
import torch
import cv2
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
from ultralytics import YOLO

# ---------------- CONFIG ----------------
image_path = "../data/images/test/00000000_jpg.rf.5d9188ebfd1f9ae1b9989c8ffc6bd7c4.jpg"
category = "shark"
classes = ['fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish', 'stingray']
model_path = "../models/fish_yolov86/weights/best.pt"


In [2]:
# Load YOLOv8 model
model = YOLO(model_path)
model.eval()  # inference mode

# Get device (CPU or GPU)
device = next(model.model.parameters()).device
print("Using device:", device)


Using device: cpu


In [3]:
def preprocess_image(img_path):
    img = Image.open(img_path).convert("RGB")
    img_resized = img.resize((640, 640))
    x = torch.tensor(np.array(img_resized)).permute(2,0,1).unsqueeze(0).float() / 255.0
    x.requires_grad = True
    return x, np.array(img)

x, orig_img = preprocess_image(image_path)
x = x.to(device)  # move input to same device as model


In [4]:
feature_maps = {}
gradients = {}

# Pick a YOLOv8 backbone conv layer (first conv block)
target_layer = model.model.model[0]

def forward_hook(module, input, output):
    feature_maps['hooked'] = output

def backward_hook(module, grad_in, grad_out):
    gradients['hooked'] = grad_out[0]

target_layer.register_forward_hook(forward_hook)
target_layer.register_backward_hook(backward_hook)


<torch.utils.hooks.RemovableHandle at 0x7fca45b5ea10>

In [5]:
# Run model.predict() to get real detection scores
dets = model.predict(image_path, verbose=False)[0]  # first image
boxes = dets.boxes
obj_scores = boxes.conf
class_indices = boxes.cls

class_idx = classes.index(category)
mask = (class_indices == class_idx)

if mask.sum() > 0:
    score = (obj_scores[mask]).max().item()
else:
    score = 0.0

print(f"Category '{category}' confidence score: {score:.4f}")


Category 'shark' confidence score: 0.9030


In [7]:
# Move input tensor to the same device as the model
device = next(model.model.parameters()).device
x = x.to(device)

# Forward pass through YOLO model
outputs = model.model(x)

# Backward pass: ensure scalar_score is on same device
model.model.zero_grad()
scalar_score = torch.tensor(score, requires_grad=True).to(device)
scalar_score.backward(retain_graph=True)


  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)


RuntimeError: Cannot use both regular backward hooks and full backward hooks on a single Module. Please use only one of them.

In [8]:
fmap = feature_maps['hooked'][0]  # [C,H,W]
grad = gradients['hooked'][0]     # [C,H,W]

# Global average pooling of gradients
weights = torch.mean(grad, dim=(1,2))

# Weighted combination of feature maps
gradcam_map = torch.zeros(fmap.shape[1:], dtype=torch.float32, device=device)
for i in range(weights.shape[0]):
    gradcam_map += weights[i] * fmap[i]

# ReLU and normalize
gradcam_map = torch.relu(gradcam_map)
gradcam_map -= gradcam_map.min()
gradcam_map /= gradcam_map.max()

# Resize to original image size
gradcam_resized = cv2.resize(gradcam_map.detach().cpu().numpy(), (orig_img.shape[1], orig_img.shape[0]))


KeyError: 'hooked'

In [None]:
def overlay_heatmap(img, heatmap, alpha=0.4):
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = cv2.addWeighted(img, alpha, heatmap, 1 - alpha, 0)
    return superimposed_img

output_img = overlay_heatmap(orig_img, gradcam_resized)
plt.figure(figsize=(8,8))
plt.imshow(output_img)
plt.axis('off')
plt.title(f"GradCAM: {category}, Score: {score:.4f}")
plt.show()

# Optional: save
cv2.imwrite("heatmap/yolov8_gradcam.jpg", cv2.cvtColor(output_img, cv2.COLOR_RGB2BGR))


In [2]:
import cv2
import torch
from ultralytics import YOLO

# ==========================
# CONFIG
# ==========================
image_path = "../data/images/test/00000000_jpg.rf.5d9188ebfd1f9ae1b9989c8ffc6bd7c4.jpg"
target_class_name = "penguin"

# Load YOLOv8 model
model = YOLO("../models/fish_yolov86/weights/best.pt")
device = next(model.model.parameters()).device

# --------------------------
# Forward hook for GradCAM
# --------------------------
layer_name = "model.16.conv"
target_layer = dict(model.model.named_modules())[layer_name]
activations, gradients = {}, {}

def forward_hook(module, input, output):
    activations['feat'] = output

def backward_hook(module, grad_input, grad_output):
    gradients['grad'] = grad_output[0]

f_hook = target_layer.register_forward_hook(forward_hook)
b_hook = target_layer.register_full_backward_hook(backward_hook)

# --------------------------
# Detect object and crop bbox
# --------------------------
results = model.predict(image_path, imgsz=640)
names = model.names

bbox = None
for det in results[0].boxes:
    cls_id = int(det.cls[0])
    if names[cls_id] == target_class_name:
        bbox = det.xyxy[0].cpu().numpy().astype(int)
        break

if bbox is None:
    raise ValueError(f"'{target_class_name}' not detected!")

# Crop bbox
img = cv2.imread(image_path)
x1, y1, x2, y2 = bbox
cropped_img = img[y1:y2, x1:x2]

# --------------------------
# Preprocess for model
# --------------------------
cropped_rgb = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)
cropped_rgb = cv2.resize(cropped_rgb, (640, 640))
x = torch.from_numpy(cropped_rgb).float() / 255.0
x = x.permute(2,0,1).unsqueeze(0).to(device)
x.requires_grad_(True)

# --------------------------
# Forward & backward
# --------------------------
outputs = model.model(x)  # direct forward
score = outputs[0].mean()  # simple scalar for GradCAM
model.model.zero_grad()
score.backward()  # now gradients are stored

# --------------------------
# Compute GradCAM score (mean of gradients)
# --------------------------
grad_map = gradients['grad'][0]  # [C,H,W]
feat_map = activations['feat'][0]  # [C,H,W]
pooled_grads = torch.mean(grad_map, dim=(1,2))
gradcam_score = torch.sum(pooled_grads * torch.mean(feat_map, dim=(1,2))).item()

print(f"GradCAM score for '{target_class_name}': {gradcam_score:.6f}")

# Remove hooks
f_hook.remove()
b_hook.remove()



image 1/1 /workspace/Jaasia/Fish_Detection/explainable-ai/../data/images/test/00000000_jpg.rf.5d9188ebfd1f9ae1b9989c8ffc6bd7c4.jpg: 448x640 1 shark, 24.5ms
Speed: 1.0ms preprocess, 24.5ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)


ValueError: 'penguin' not detected!

In [2]:
for name, layer in model.model.named_modules():
    if isinstance(layer, torch.nn.Conv2d):
        print(name, layer)


model.0.conv Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.1.conv Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.2.cv1.conv Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
model.2.cv2.conv Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
model.2.m.0.cv1.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.2.m.0.cv2.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.2.m.1.cv1.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.2.m.1.cv2.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.3.conv Conv2d(96, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.4.cv1.conv Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1))
model.4.cv2.conv Conv2d(576, 192, kernel_size=(1, 1), stride=(1, 1))
model.4.m.0.cv1.conv Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.4.m.0.cv2.conv Conv2d(96, 96, kernel_size=(3, 3),