In [1]:
import torch
import cv2
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
from ultralytics import YOLO

# ---------------- CONFIG ----------------
image_path = "../data/images/test/00000000_jpg.rf.5d9188ebfd1f9ae1b9989c8ffc6bd7c4.jpg"
category = "shark"
classes = ['fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish', 'stingray']
model_path = "../models/fish_yolov86/weights/best.pt"


In [2]:
# Load YOLOv8 model
model = YOLO(model_path)
model.eval()  # inference mode

# Get device (CPU or GPU)
device = next(model.model.parameters()).device
print("Using device:", device)


Using device: cpu


In [3]:
def preprocess_image(img_path):
    img = Image.open(img_path).convert("RGB")
    img_resized = img.resize((640, 640))
    x = torch.tensor(np.array(img_resized)).permute(2,0,1).unsqueeze(0).float() / 255.0
    x.requires_grad = True
    return x, np.array(img)

x, orig_img = preprocess_image(image_path)
x = x.to(device)  # move input to same device as model


In [4]:
feature_maps = {}
gradients = {}

# Pick a YOLOv8 backbone conv layer (first conv block)
target_layer = model.model.model[0]

def forward_hook(module, input, output):
    feature_maps['hooked'] = output

def backward_hook(module, grad_in, grad_out):
    gradients['hooked'] = grad_out[0]

target_layer.register_forward_hook(forward_hook)
target_layer.register_backward_hook(backward_hook)


<torch.utils.hooks.RemovableHandle at 0x7f44ddfa6310>

In [5]:
# Run model.predict() to get real detection scores
dets = model.predict(image_path, verbose=False)[0]  # first image
boxes = dets.boxes
obj_scores = boxes.conf
class_indices = boxes.cls

class_idx = classes.index(category)
mask = (class_indices == class_idx)

if mask.sum() > 0:
    score = (obj_scores[mask]).max().item()
else:
    score = 0.0

print(f"Category '{category}' confidence score: {score:.4f}")


Category 'shark' confidence score: 0.9030


In [2]:
for name, layer in model.model.named_modules():
    if isinstance(layer, torch.nn.Conv2d):
        print(name, layer)


model.0.conv Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.1.conv Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.2.cv1.conv Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
model.2.cv2.conv Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
model.2.m.0.cv1.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.2.m.0.cv2.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.2.m.1.cv1.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.2.m.1.cv2.conv Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.3.conv Conv2d(96, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.4.cv1.conv Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1))
model.4.cv2.conv Conv2d(576, 192, kernel_size=(1, 1), stride=(1, 1))
model.4.m.0.cv1.conv Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.4.m.0.cv2.conv Conv2d(96, 96, kernel_size=(3, 3),