# Preliminary


Here we install all imports and other necessary components.


## Imports


In [83]:
!pip install ultralytics
!pip install imageio
!pip install numpy
!pip install opencv-python
!pip install tqdm

import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.2.80 🚀 Python-3.10.1 torch-2.4.0 CPU (Apple M1)
Setup complete ✅ (8 CPUs, 16.0 GB RAM, 284.5/460.4 GB disk)


In [84]:
import os
import cv2
import glob
import numpy as np
import xml.etree.ElementTree as ET

from IPython import display

display.clear_output()

from ultralytics import YOLO
from IPython.display import display, Image
from pathlib import Path
from tqdm import tqdm

In [4]:
# Check what GPU is available
!nvidia-smi

zsh:1: command not found: nvidia-smi


## Directories


In [5]:
BASE_DIR = Path("/Users/jan/Documents/code/cv/project")
VIDEO_DIR = BASE_DIR / "data/fishclef_2015_release/training_set/videos"
GT_DIR = BASE_DIR / "data/fishclef_2015_release/training_set/gt"
IMG_DIR = BASE_DIR / "train_img/"
GMM_DIR = BASE_DIR / "train_gmm/"
OPTICAL_DIR = BASE_DIR / "train_optical/"
GMM_OPTICAL_DIR = BASE_DIR / "train_gmm_optical/"

SPECIES_LIST = [
    "abudefduf vaigiensis",
    "acanthurus nigrofuscus",
    "amphiprion clarkii",
    "chaetodon lununatus",
    "chaetodon speculum",
    "chaetodon trifascialis",
    "chromis chrysura",
    "dascyllus aruanus",
    "dascyllus reticulatus",
    "hemigumnus malapterus",
    "myripristis kuntee",
    "neoglyphidodon nigroris",
    "pempheris vanicolensis",
    "plectrogly-phidodon dickii",
    "zebrasoma scopas",
]

UNKNOWN_LABEL = 15

FOREGROUND_DETECTOR_PARAMS = {
    "history": 250,
    "varThreshold": 16,
    "detectShadows": True,
}
BLOB_ANALYSIS_PARAMS = {"min_area": 200}
STRUCTURING_ELEMENT_OPEN = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
STRUCTURING_ELEMENT_CLOSE = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
FRAME_RESIZE = (640, 640)
FRAME_ADJUST_GAMMA = 1.5
FARNEBACK_PARAMS = {
    "pyr_scale": 0.5,
    "levels": 3,
    "winsize": 15,
    "iterations": 3,
    "poly_n": 5,
    "poly_sigma": 1.2,
    "flags": 0,
}

---


# Create training data


In [45]:
if os.path.exists(IMG_DIR) == False:
    os.mkdir(IMG_DIR)

if os.path.exists(GMM_DIR) == False:
    os.mkdir(GMM_DIR)

if os.path.exists(OPTICAL_DIR) == False:
    os.mkdir(OPTICAL_DIR)

if os.path.exists(GMM_OPTICAL_DIR) == False:
    os.mkdir(GMM_OPTICAL_DIR)

In [12]:
def adjust_gamma(image, gamma=1.0):
    """
    Adjust the gamma of an image.

    Parameters:
    - image (numpy.ndarray): The input image on which gamma correction is to be applied.
    - gamma (float): The gamma value for correction. Default is 1.0. Values less than 1.0 will make the image darker,
                     while values greater than 1.0 will make the image lighter.

    Returns:
    - numpy.ndarray: The gamma-corrected image.
    """
    # Calculate the inverse of the gamma value
    invGamma = 1.0 / gamma

    # Build a lookup table mapping pixel values [0, 255] to their adjusted gamma values
    table = np.array([(i / 255.0) ** invGamma * 255 for i in range(256)], dtype="uint8")

    # Apply the gamma correction using the lookup table
    return cv2.LUT(image, table)

In [None]:
def save_gmm_annotation(annotation_filename, bboxes, image_width, image_height):
    """
    Save annotations in YOLO format for Gaussian Mixture Model (GMM) detected bounding boxes.

    Parameters:
    - annotation_filename (str): The file to save annotations.
    - bboxes (list of tuples): List of bounding boxes, where each bounding box is represented as a tuple (x, y, width, height).
    - image_width (int): Width of the image.
    - image_height (int): Height of the image.

    Returns:
    - None
    """
    # Open the annotation file in write mode
    with open(annotation_filename, "w") as file:
        # Iterate over each bounding box
        for x, y, width, height in bboxes:
            # Normalize the coordinates
            x_center = (x + width / 2.0) / image_width
            y_center = (y + height / 2.0) / image_height
            width /= image_width
            height /= image_height

            # Write the normalized coordinates to the file in YOLO format
            file.write(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

In [None]:
def save_annotation(name, annotation_file_path, bboxes, image_width, image_height):
    """
    Save annotations in YOLO format for each frame.

    Parameters:
    - name (str): Base name for the annotation files.
    - annotation_file_path (Path): Path object representing the directory to save annotation files.
    - bboxes (list of dict): List of bounding boxes, where each bounding box is represented as a dictionary with keys 'frame_id', 'fish_species', 'x', 'y', 'w', 'h'.
    - image_width (int): Width of the image.
    - image_height (int): Height of the image.

    Returns:
    - None
    """
    # Group bounding boxes by frame_id
    frame_bboxes = {}
    for bbox in bboxes:
        frame_id = bbox["frame_id"]
        frame_bboxes.setdefault(frame_id, []).append(bbox)

    # Iterate over each frame and save annotations
    for frame_id, bboxes in frame_bboxes.items():
        # Create a unique file name for each frame
        frame_annotation_file = annotation_file_path / f"{name}_{frame_id:04d}.txt"

        # Open the annotation file in write mode
        with open(frame_annotation_file, "w") as file:
            # Iterate over each bounding box in the frame
            for fish in bboxes:
                fish_species = fish.get("fish_species", "").lower()
                x, y, width, height = (
                    fish.get("x", 0),
                    fish.get("y", 0),
                    fish.get("w", 0),
                    fish.get("h", 0),
                )

                # Normalize the coordinates
                x_center = (x + width / 2.0) / image_width
                y_center = (y + height / 2.0) / image_height
                width /= image_width
                height /= image_height

                # Determine the species index
                species_index = (
                    SPECIES_LIST.index(fish_species)
                    if fish_species in SPECIES_LIST
                    else UNKNOWN_LABEL
                )

                # Write the normalized coordinates to the file in YOLO format
                file.write(
                    f"{species_index} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"
                )

In [None]:
def extract_ground_truth(video_path):
    """
    Extract ground truth annotations from an XML file corresponding to a video.

    Parameters:
    - video_path (Path): Path object representing the path to the video file.

    Returns:
    - list of dict: A list of dictionaries, where each dictionary contains the ground truth annotations for a frame.
      Each dictionary has the following keys:
        - frame_id (int): The ID of the frame.
        - fish_species (str): The species of the fish.
        - x (int): The x-coordinate of the bounding box.
        - y (int): The y-coordinate of the bounding box.
        - w (int): The width of the bounding box.
        - h (int): The height of the bounding box.
    """
    # Extract the file name without extension from the video path
    file_name_without_ext = video_path.stem

    # Construct the path to the ground truth XML file
    gt_xml_path = GT_DIR / f"{file_name_without_ext}.xml"

    # Check if the ground truth XML file exists
    if not gt_xml_path.exists():
        print(f"Ground truth XML not found: {gt_xml_path}")
        return []

    # Parse the XML file
    tree = ET.parse(gt_xml_path)
    root = tree.getroot()

    # Initialize an empty list to store ground truth annotations
    ground_truth = []

    # Iterate over each frame element in the XML
    for frame in root.findall("frame"):
        frame_id = int(frame.get("id"))

        # Iterate over each object element within the frame
        for obj in frame.findall("object"):
            # Append the ground truth annotation to the list
            ground_truth.append(
                {
                    "frame_id": frame_id,
                    "fish_species": obj.get("fish_species"),
                    "x": int(obj.get("x")),
                    "y": int(obj.get("y")),
                    "w": int(obj.get("w")),
                    "h": int(obj.get("h")),
                }
            )

    return ground_truth

In [None]:
def apply_gmm(frame, frame_idx, gmm_dir, foreground_detector):
    """
    Apply Gaussian Mixture Model (GMM) to a video frame to detect foreground objects and save the results.

    Parameters:
    - frame (numpy.ndarray): The input video frame.
    - frame_idx (int): The index of the current frame.
    - gmm_dir (Path): Path object representing the directory to save GMM results.
    - foreground_detector (cv2.BackgroundSubtractor): The foreground detector object.

    Returns:
    - numpy.ndarray: The processed foreground mask.
    """
    # Apply the foreground detector to the frame
    foreground = foreground_detector.apply(frame)

    # Apply morphological opening to remove noise
    filtered_foreground = cv2.morphologyEx(
        foreground, cv2.MORPH_OPEN, STRUCTURING_ELEMENT_OPEN
    )

    # Apply morphological closing to fill gaps
    filtered_foreground = cv2.morphologyEx(
        filtered_foreground, cv2.MORPH_CLOSE, STRUCTURING_ELEMENT_CLOSE
    )

    # Find contours in the filtered foreground mask
    contours, _ = cv2.findContours(
        filtered_foreground, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )

    # Filter contours based on minimum area and compute bounding boxes
    bboxes = [
        cv2.boundingRect(c)
        for c in contours
        if cv2.contourArea(c) >= BLOB_ANALYSIS_PARAMS["min_area"]
    ]

    # Save the filtered foreground mask as an image
    gmm_frame_path = gmm_dir / f"gmm_img_{frame_idx:04d}.png"
    cv2.imwrite(str(gmm_frame_path), filtered_foreground)

    # Save the bounding boxes as annotations
    gmm_annotation_path = gmm_dir / f"gmm_img_{frame_idx:04d}.txt"
    if bboxes:
        save_gmm_annotation(
            gmm_annotation_path, bboxes, FRAME_RESIZE[0], FRAME_RESIZE[1]
        )
    else:
        # Create an empty annotation file if no bounding boxes are found
        gmm_annotation_path.touch()

    return filtered_foreground

In [None]:
def apply_optical_flow(frame, frame_idx, prvs, hsv, flow_dir):
    """
    Apply Farneback optical flow to a video frame and save the results.

    Parameters:
    - frame (numpy.ndarray): The current video frame.
    - frame_idx (int): The index of the current frame.
    - prvs (numpy.ndarray): The previous grayscale frame.
    - hsv (numpy.ndarray): The HSV image used for visualizing the optical flow.
    - flow_dir (Path): Path object representing the directory to save optical flow results.

    Returns:
    - tuple: A tuple containing:
        - bgr_resized (numpy.ndarray): The resized BGR image representing the optical flow.
        - next_frame (numpy.ndarray): The next grayscale frame.
    """
    # Convert the current frame to grayscale
    next_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Calculate the optical flow using Farneback method
    flow = cv2.calcOpticalFlowFarneback(prvs, next_frame, None, **FARNEBACK_PARAMS)

    # Compute the magnitude and angle of the flow
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])

    # Set the hue of the HSV image based on the angle of the flow
    hsv[..., 0] = ang * 180 / np.pi / 2

    # Set the value of the HSV image based on the normalized magnitude of the flow
    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)

    # Convert the HSV image to BGR for visualization
    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    # Resize the BGR image to match the desired frame size (640x640)
    bgr_resized = cv2.resize(bgr, FRAME_RESIZE)

    # Construct the path to save the optical flow frame
    flow_frame_path = flow_dir / f"flow_img_{frame_idx:04d}.png"

    # Save the resized BGR image to the specified directory
    cv2.imwrite(str(flow_frame_path), bgr_resized)

    return bgr_resized, next_frame

In [None]:
def apply_combination(
    frame, frame_idx, filtered_foreground, bgr_resized, gt_bboxes, combined_dir
):
    """
    Combine GMM and Optical Flow images, save the combined image, and save ground truth annotations.

    Parameters:
    - frame (numpy.ndarray): The original video frame.
    - frame_idx (int): The index of the current frame.
    - filtered_foreground (numpy.ndarray): The foreground mask obtained from GMM.
    - bgr_resized (numpy.ndarray): The resized BGR image obtained from optical flow.
    - gt_bboxes (list of dict): List of ground truth bounding boxes for the frame.
    - combined_dir (Path): Path object representing the directory to save combined results.

    Returns:
    - None
    """
    # Initialize a blank image with the same shape as the original frame
    combined_frame = np.zeros_like(frame)

    # Combine the filtered foreground mask into the green channel
    combined_frame[:, :, 1] = filtered_foreground

    # Combine the blue channel of the resized BGR image into the red channel
    combined_frame[:, :, 2] = bgr_resized[:, :, 0]  # Use resized bgr

    # Construct the path to save the combined image
    combined_frame_path = combined_dir / f"combined_img_{frame_idx:04d}.png"

    # Save the combined image to the specified directory
    cv2.imwrite(str(combined_frame_path), combined_frame)

    # Construct the path to save the ground truth annotations
    combined_annotation_path = combined_dir / f"combined_img_{frame_idx:04d}.txt"
    name = "combined_img"

    # Save the ground truth annotations if they exist
    if gt_bboxes:
        save_annotation(
            name,
            combined_dir,
            gt_bboxes,
            FRAME_RESIZE[0],
            FRAME_RESIZE[1],
        )
    else:
        # Create an empty annotation file if no ground truth bounding boxes are found
        combined_annotation_path.touch()

In [None]:
def process_frame(
    frame,
    frame1,
    frame_idx,
    gt_bboxes,
    foreground_detector,
    prvs,
    hsv,
    img_dir,
    gmm_dir,
    flow_dir,
    combined_dir,
):
    """
    Process a video frame by applying GMM and Optical Flow, and save the results.

    Parameters:
    - frame (numpy.ndarray): The current video frame.
    - frame1 (numpy.ndarray): The next video frame for optical flow calculation.
    - frame_idx (int): The index of the current frame.
    - gt_bboxes (list of dict): List of ground truth bounding boxes for the frame.
    - foreground_detector (cv2.BackgroundSubtractor): The foreground detector object.
    - prvs (numpy.ndarray): The previous grayscale frame for optical flow calculation.
    - hsv (numpy.ndarray): The HSV image used for visualizing the optical flow.
    - img_dir (Path): Path object representing the directory to save original frames.
    - gmm_dir (Path): Path object representing the directory to save GMM results.
    - flow_dir (Path): Path object representing the directory to save optical flow results.
    - combined_dir (Path): Path object representing the directory to save combined results.

    Returns:
    - numpy.ndarray: The next grayscale frame for optical flow calculation.
    """
    # Save the original frame to the img_dir
    img_frame_path = img_dir / f"img_{frame_idx:04d}.png"
    cv2.imwrite(str(img_frame_path), frame)

    # Save annotations for the original frame (train_img)
    img_annotation_path = img_dir / f"img_{frame_idx:04d}.txt"
    name = "img"
    if gt_bboxes:
        # Save ground truth annotations if they exist
        save_annotation(
            name,
            img_dir,
            gt_bboxes,
            FRAME_RESIZE[0],
            FRAME_RESIZE[1],
        )
    else:
        # Create an empty annotation file if no ground truth bounding boxes are found
        img_annotation_path.touch()

    # Apply GMM to the frame to detect foreground objects
    foreground = apply_gmm(frame, frame_idx, gmm_dir, foreground_detector)

    # Apply optical flow to the next frame
    bgr, next_frame = apply_optical_flow(frame1, frame_idx, prvs, hsv, flow_dir)

    # Combine GMM and optical flow results and save the combined image
    apply_combination(frame, frame_idx, foreground, bgr, gt_bboxes, combined_dir)

    return next_frame

In [None]:
def process_video(video_path):
    """
    Process a video to extract ground truth, apply GMM and optical flow, and save the results.

    Parameters:
    - video_path (Path): Path object representing the path to the video file.

    Returns:
    - None
    """
    # Extract the last 15 characters of the video file name (without extension) to use as a directory name
    video_name_short = video_path.stem[-15:]

    # Define directories for saving images, GMM results, optical flow results, and combined results
    img_dir = IMG_DIR / video_name_short
    gmm_dir = GMM_DIR / video_name_short
    flow_dir = OPTICAL_DIR / video_name_short
    combined_dir = GMM_OPTICAL_DIR / video_name_short

    # Create the directories if they do not exist
    for directory in [img_dir, gmm_dir, flow_dir, combined_dir]:
        os.makedirs(directory, exist_ok=True)

    # Extract ground truth bounding boxes from the corresponding XML file
    gt_bboxes = extract_ground_truth(video_path)

    # Open the video file
    cap = cv2.VideoCapture(str(video_path))

    # Get the total number of frames in the video
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Create a foreground detector using MOG2
    foreground_detector = cv2.createBackgroundSubtractorMOG2(
        **FOREGROUND_DETECTOR_PARAMS
    )

    # Read the first frame of the video
    ret, frame1 = cap.read()

    # Check if the video file was read successfully
    if not ret:
        print(f"Failed to read the video file: {video_path}")
        return

    # Convert the first frame to grayscale for optical flow calculation
    prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)

    # Initialize an HSV image for visualizing optical flow
    hsv = np.zeros_like(frame1)
    hsv[..., 1] = 255

    # Initialize the frame index
    frame_idx = 0

    # Process each frame of the video
    with tqdm(total=total_frames, desc=f"Processing {video_name_short}") as video_pbar:
        while ret:
            # Resize the frame and adjust its gamma
            frame = cv2.resize(frame1, FRAME_RESIZE)
            frame = adjust_gamma(frame, FRAME_ADJUST_GAMMA)

            # Process the current frame
            next_frame = process_frame(
                frame,
                frame1,
                frame_idx,
                gt_bboxes,
                foreground_detector,
                prvs,
                hsv,
                img_dir,
                gmm_dir,
                flow_dir,
                combined_dir,
            )

            # Update the progress bar
            video_pbar.update(1)

            # Update the previous frame for optical flow calculation
            prvs = next_frame

            # Read the next frame of the video
            ret, frame1 = cap.read()

            # Increment the frame index
            frame_idx += 1

    # Release the video capture object
    cap.release()

In [None]:
"""
Main function to process all video files in the specified directory.

This function searches for video files with .flv and .avi extensions in the VIDEO_DIR,
and processes each video file using the process_video function.
"""
# Get a list of all .flv and .avi video files in the VIDEO_DIR
video_files = list(VIDEO_DIR.glob("*.flv")) + list(VIDEO_DIR.glob("*.avi"))

# Iterate over each video file and process it
for video in video_files:
    process_video(video)

---


## Create train.txt for YOLO


In [None]:
output_file = "train.txt"  # Name of the output file

with open(output_file, "w") as out_file:
    # Iterate over all files in the directory
    for root, dirs, files in os.walk(IMG_DIR):
        for file_name in files:
            # Check if the file has a .jpg extension
            if file_name.lower().endswith(".jpg"):
                file_path = os.path.join(root, file_name)
                out_file.write(f"{IMG_DIR}{file_name}\n")

---


# Classification using YOLO (TODO)


## Custom structures for YOLO detection


In [42]:
class BOX(Structure):
    _fields_ = [("x", c_float), ("y", c_float), ("w", c_float), ("h", c_float)]


class DETECTION(Structure):
    _fields_ = [
        ("bbox", BOX),
        ("classes", c_int),
        ("prob", POINTER(c_float)),
        ("mask", POINTER(c_float)),
        ("objectness", c_float),
        ("sort_class", c_int),
    ]


class IMAGE(Structure):
    _fields_ = [("w", c_int), ("h", c_int), ("c", c_int), ("data", POINTER(c_float))]


class METADATA(Structure):
    _fields_ = [("classes", c_int), ("names", POINTER(c_char_p))]

NameError: name 'Structure' is not defined

## Load and set up darknet


In [43]:
# Load darknet library
lib = CDLL(Path(__file__).parent / "../darknet/libdarknet.so", RTLD_GLOBAL)

# Set up function arguments and return types
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
lib.load_network.argtypes = [c_char_p, c_char_p, c_int]
lib.load_network.restype = c_void_p
lib.load_image_color.argtypes = [c_char_p, c_int, c_int]
lib.load_image_color.restype = IMAGE
lib.network_predict_image.argtypes = [c_void_p, IMAGE]
lib.network_predict_image.restype = POINTER(c_float)
lib.get_network_boxes.argtypes = [
    c_void_p,
    c_int,
    c_int,
    c_float,
    c_float,
    POINTER(c_int),
    c_int,
    POINTER(c_int),
]
lib.get_network_boxes.restype = POINTER(DETECTION)
lib.do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
lib.free_image.argtypes = [IMAGE]
lib.free_detections.argtypes = [POINTER(DETECTION), c_int]

NameError: name 'CDLL' is not defined

## Detection


In [None]:
def detect(net, meta, image, thresh=0.25, hier_thresh=0.5, nms=0.45):
    """
    Performs object detection on a given image using the YOLO model.

    Parameters:
    - net: YOLO network object.
    - meta: Metadata object containing class information.
    - image: Path to the image file.
    - thresh: Detection threshold.
    - hier_thresh: Hierarchical threshold.
    - nms: Non-max suppression threshold.

    Returns:
    - A sorted list of detection results, each containing the class name, probability, and bounding box coordinates.
    """
    im = lib.load_image_color(image.encode("utf-8"), 0, 0)
    num = c_int(0)
    pnum = pointer(num)
    lib.network_predict_image(net, im)
    dets = lib.get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
    num = pnum[0]

    if nms:
        lib.do_nms_obj(dets, num, meta.classes, nms)

    results = []
    for j in range(num):
        for i in range(meta.classes):
            if dets[j].prob[i] > 0:
                b = dets[j].bbox
                results.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))

    lib.free_image(im)
    lib.free_detections(dets, num)

    return sorted(results, key=lambda x: -x[1])

In [None]:
"""
Function to process images listed in the validation files and save YOLO detection results.

This function performs the following steps:
1. Initializes the YOLO network and metadata.
2. Reads validation image paths from 'val_from_test.txt' and 'val_from_train.txt'.
3. Processes each image in the validation set to perform object detection.
4. Saves the detection results in the specified directories.

The function uses the YOLO model to detect objects in the images and saves the detection results
as binary images where detected regions are highlighted.
"""

# Initialize YOLO network and metadata
net = lib.load_network(b"~/cfg/yolov3-fishclef.cfg", b"~/fishclef.weights", 0)
meta = lib.get_metadata(b"~/cfg/fishclef.data")

# Directories to save YOLO detection results
save_test_part = "~/Test_dataset/yolo_test_part"
save_train_part = "~/Test_dataset/yolo_train_part"

# Read validation image paths
with open("~/val_from_test.txt") as val_from_test, open(
    "~/val_from_train.txt"
) as val_from_train:
    val_test = [line.rstrip() for line in val_from_test]
    val_train = [line.rstrip() for line in val_from_train]

# Image dimensions
img_height, img_width = 640, 640
test_count = 0
detected_count = 0

# Process each image in the validation set
for img_name in val_test:
    test_count += 1
    print(f"Processing {test_count}/{len(val_test)}: {img_name}")
    video_file = os.path.basename(os.path.dirname(img_name))
    img_file = os.path.basename(img_name)

    # Create directory to save results if it doesn't exist
    save_path = join(save_test_part, video_file)
    os.makedirs(save_path, exist_ok=True)

    # Perform detection
    detections = detect(net, meta, img_name)
    detected_blob_img = np.zeros((img_height, img_width), dtype=np.uint8)

    if detections:
        detected_count += 1
        print(f"Detected in frame {detected_count}/{test_count}")
        for fish_info in detections:
            x, y, w, h = map(int, fish_info[2])
            xmin, ymin = max(0, x - w // 2), max(0, y - h // 2)
            xmax, ymax = min(img_width, x + w // 2), min(img_height, y + h // 2)

            # Only consider detections with area less than 25600
            if w * h < 25600:
                detected_blob_img[ymin:ymax, xmin:xmax] = int(fish_info[1] * 255)

    # Save the detection result image
    cv2.imwrite(join(save_path, img_file), detected_blob_img)