In [2]:
import csv
import math
import os
import random
import re
import shutil
from itertools import product
from typing import Dict, List, Tuple

import cv2
import numpy as np
from IPython.display import Image, display
from PIL import Image as PILImage
from pydantic import BaseModel
from ultralytics import YOLO

In [3]:
# datasets
folder_path = os.path.join(os.getcwd(), 'data')

# models
models = os.path.join(os.getcwd(), 'models')

# logs
logs = os.path.join(os.getcwd(), 'logs')

yolo_data = os.path.join(folder_path, 'tiled_data', 'data.yaml')

# Gamma correction code
These function will get executed during the image slicing.

In [3]:
def gamma_correction(image, gamma=1.0):
    # Ensure gamma is a positive number
    if gamma <= 0:
        raise ValueError("Gamma should be greater than 0")

    # Build a lookup table mapping pixel values [0, 255] to their gamma-corrected values
    inv_gamma = 1.0 / gamma
    table = np.array([
        ((i / 255.0) ** inv_gamma) * 255
        for i in range(256)
    ]).astype("uint8")

    # Apply gamma correction using the lookup table
    return cv2.LUT(image, table)


def auto_gamma(image, target_brightness=0.5):
    """
    Automatically adjusts gamma to normalize image brightness.

    Gamma > 1 brightens the image, Gamma < 1 darkens the image.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    brightness = np.mean(gray) / 255.0

    # Prevent division by zero or extremely low brightness
    if brightness < 1e-3:
        gamma = 2.5  # Force strong brightening
    else:
        gamma = target_brightness / brightness

    gamma = np.clip(gamma, 0.3, 3.0)  # Clamp to reasonable range
    corrected = gamma_correction(image, gamma)
    return corrected, gamma

# Tiling

In [5]:
VARIANTS = ["train", "test", "val"]
TARGET_RESOLUTION = (1280, 1280)
OVERLAP = 0.0 # As a percentage
OUTPUT_FOLDER = "/home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiling_output"
INPUT_FOLDER = "/home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/yolo_data"

# Create the output folders
for variant in VARIANTS:
    if not os.path.exists(os.path.join(OUTPUT_FOLDER, variant)):
        os.makedirs(os.path.join(OUTPUT_FOLDER, variant, "images"), exist_ok=True)
        os.makedirs(os.path.join(OUTPUT_FOLDER, variant, "labels"), exist_ok=True)

files = []
for variant in VARIANTS:
    for file in os.listdir(os.path.join(INPUT_FOLDER, variant, "images")):
        files.append(os.path.join(INPUT_FOLDER, variant, "images", file))


In [6]:
tiles_per_image = {}

for file in files:
    img_tiles = []
    img = cv2.imread(file)

    h, w, channels = img.shape

    x_tile_step = TARGET_RESOLUTION[0] * (1-OVERLAP)
    y_tile_step = TARGET_RESOLUTION[1] * (1-OVERLAP)

    number_of_images_w = math.ceil(w / x_tile_step)
    number_of_images_h = math.ceil(h / y_tile_step)

    for y in range(number_of_images_h):
        for x in range(number_of_images_w):
            x_min = int(x * x_tile_step)
            x_max = int(min(x_min + TARGET_RESOLUTION[0], w))
            y_min = int(y * y_tile_step)
            y_max = int(min(y_min + TARGET_RESOLUTION[1], h))

            tile = img[y_min:y_max, x_min:x_max]
            tile_corrected, gamma = auto_gamma(tile)
            img_tiles.append({"w": w, "h": h, "x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max, "tile": tile_corrected})

    tiles_per_image[file] = img_tiles

In [13]:
class Label:
    """Denotes the bounding box in pixels, by having a minimum and maximum x & y."""
    def __init__(self, x_min, x_max, y_min, y_max):
        self.x_min = x_min
        self.x_max = x_max
        self.y_min = y_min
        self.y_max = y_max


def parse_labels(labels: List[str], original_w: int, original_h: int) -> List[Label]:
    parsed_labels = []
    for label in labels:
        coords = label.split(" ")[1:]

        width = float(coords[2])
        height = float(coords[3])

        x_min = float(coords[0]) - (width/2)
        y_min = float(coords[1]) - (height/2)
        x_max = x_min + width
        y_max = y_min + height

        parsed_labels.append(
            Label(
                x_min * original_w,
                x_max * original_w,
                y_min * original_h,
                y_max * original_h
            )
        )
    return parsed_labels

for original_path, tiles in tiles_per_image.items():
    label_file = original_path.replace("images", "labels")
    label_file = ".".join(label_file.split(".")[:-1]) + ".txt"

    with open(label_file, "r") as f:
        parsed_labels = parse_labels(f.readlines(), tiles[0]["w"], tiles[0]["h"])

    # Get the variant of the original image
    last_folder_name = INPUT_FOLDER.split("/")[-1].split("\\")[-1]
    search = re.search(rf"{last_folder_name}(/|\\)(.*?)(/|\\)images(/|\\)", original_path)
    variant = search.group(2)

    for i, tile in enumerate(tiles):
        # Place tile image
        tile_img_file_name = ".".join(original_path.split("/")[-1].split(".")[:-1]) + "_tile-" + str(i) + ".png"
        tile_img_path = os.path.join(OUTPUT_FOLDER, variant, "images", tile_img_file_name)

        cv2.imwrite(tile_img_path, tile["tile"])

        # Figure out which labels of the labels of the original image are in the tile, and adapt the coordinates accordingly
        tile_labels = []
        for label_i, label in enumerate(parsed_labels):
            if (tile["x_min"] < label.x_min and tile["x_max"] > label.x_max and
                tile["y_min"] < label.y_min and tile["y_max"] > label.y_max):

                x_min = label.x_min - tile["x_min"]
                y_min = label.y_min - tile["y_min"]
                x_max = label.x_max - tile["x_min"]
                y_max = label.y_max - tile["y_min"]

                tile_width = tile["x_max"] - tile["x_min"]
                tile_height = tile["y_max"] - tile["y_min"]

                label_relative_width = (x_max - x_min) / tile_width
                label_relative_height = (y_max - y_min) / tile_height

                tile_labels.append(f"0 {x_min / tile_width + (label_relative_width / 2)} {y_min / tile_height + (label_relative_height / 2)} "
                                   f"{label_relative_width} "
                                   f"{label_relative_height}")


        tile_label_file_name = tile_img_file_name.replace(".png", ".txt")
        tile_label_path = os.path.join(OUTPUT_FOLDER, variant, "labels", tile_label_file_name)
        with open(tile_label_path, "w") as f:
            f.write("\n".join(tile_labels))

# Bird generation

In [14]:
class ImageData(BaseModel):
    image_name: str
    image_paths: List[str] = list()
    label_text: str
    bird_class: int = None # 0 = crow, 1 = , 2 = , 3 = pigeon, 4 = other
    cleaned_file: str = ""

    def model_post_init(self, context):
        self.bird_class = int(self.label_text[0])

        return super().model_post_init(context)

    def get_random_image_path(self):
        random_image = random.choice(self.image_paths)
        random_image = self.image_paths[0]

        return self.image_name, random_image, self.label_text, self.bird_class

    def get_cleaned_image(self):
        display(Image(filename=self.cleaned_file))

    def get_cleaned_scaled_image(self, new_width, new_height):
        img = PILImage.open(self.cleaned_file)
        wpercent = (new_width / float(img.size[0]))
        hsize = int((float(img.size[1]) * float(wpercent)))
        img_resized = img.resize((new_width, hsize), PILImage.Resampling.LANCZOS)
        display(img_resized)

    def get_cropped_images(self, new_width):
        img = PILImage.open(self.cleaned_file)
        img_width, img_height = img.size

        bounding_boxes = self.label_text.split("\n")
        # You only want to take one of the bounding boxes to display because we only want to add one picture into another picture
        # So we take the largest one, which has the highest probability to be one that is the most complete bird
        sorted_bounding_boxes = sorted(bounding_boxes, reverse=True, key= lambda x: x[3])
        for largest_bounding_box in sorted_bounding_boxes:
            # when the data is in incorrect format
            if len(largest_bounding_box.split(" ")) != 5:
                return False
            bird_class, x_center_rel, y_center_rel, width_rel, height_rel = map(float, largest_bounding_box.split(" "))
            x_center = x_center_rel * img_width
            y_center = y_center_rel * img_height
            width = width_rel * img_width
            height = height_rel * img_height

            x_short = x_center - (0.5 * width)
            x_long = x_center + (0.5 * width)
            y_short = y_center - (0.5 * height)
            y_long = y_center + (0.5 * height)
            cropped_img = img.crop((x_short, y_short, x_long, y_long))

            # now we are scaling the cropped image to the correct size
            wpercent = (new_width / float(img_width))
            hsize = int((float(img_height) * wpercent))
            img_resized = cropped_img.resize((new_width, hsize), PILImage.Resampling.LANCZOS)
            if cropped_img.mode != "RGBA":
                print("image is in mode: ", cropped_img.mode, "converting to RGBA")
                cropped_img = cropped_img.convert("RGBA")
            # Extract alpha channel (opacity)
            alpha = img_resized.getchannel("A")

            # Convert to numpy array for efficient computation
            alpha_np = np.array(alpha, dtype=np.float32) / 255.0  # Normalize to [0,1]

            # Calculate average opacity
            avg_opacity = np.mean(alpha_np)

            # Skip image if average opacity is less than 0.05
            if avg_opacity < 0.05:
                print("the opacity is too little for the largest bounding box")
                continue

            return img_resized
        return False

In [15]:
class AllImages(BaseModel):
    images_dict: Dict[str, ImageData] = dict()

    def get_image_list_index(self, bird_classes: Tuple[int] = (0, 1, 2, 3, 4), cleaned_file=False):
        """gets a list of bird images that satisify the requirement of input"""
        if cleaned_file:
            found_image_dict = {index: image_name
                        for index, (image_name, image)
                          in enumerate(self.images_dict.items())
                          if image.bird_class in bird_classes and image.cleaned_file != ""}
        else:
            found_image_dict = {index: image_name
                            for index, (image_name, image)
                            in enumerate(self.images_dict.items())
                            if image.bird_class in bird_classes}
        return found_image_dict

    def get_random_instance(self, bird_classes: Tuple[int] = (0, 1, 2, 3, 4), cleaned_file=False):
        found_image_dict = self.get_image_list_index(bird_classes, cleaned_file)
        random_key = random.choice(list(found_image_dict.keys()))
        found_image = found_image_dict[random_key]
        return self.images_dict[found_image]

    def get_random_picture(self, bird_classes: Tuple[int] = (0, 1, 2, 3, 4)):

        found_image = self.get_random_instance(bird_classes)
        image_name, found_image_path, label_text, bird_class = found_image.get_random_image_path()
        display(Image(filename=found_image_path))
        return found_image_path

    def get_random_clean_image(self, new_width, new_height):
        image = self.get_random_instance((0, 3), True)
        print(image.image_name)
        print(image.cleaned_file)
        image.get_cleaned_scaled_image(new_width, new_height)

    def get_random_cropped_images(self, new_width):

        # Sometimes the cropped image is in the wrong format. So we recursively call this function to retry another one
        for i in range(10):
            image = self.get_random_instance((0, 3), True)
            cropped_image = image.get_cropped_images(new_width)
            if cropped_image:
                print("found image= ", image.image_name)
                return cropped_image

        print("No valid cropped image found after 10 attempts.")
        return None

    def get_list_of_paths_crows_pigeons(self):
        """returns all of the information of the files as a list of lists.
        only includes pigeons and crows"""
        found_images_objects = self.get_image_list_index((0, 3))
        image_paths = [self.images_dict[image].get_random_image_path() for image in list(found_images_objects.values())]
        return image_paths

    def copy_crows_pigeons(self, destination_folder: str):
        crows_pigeon_paths = self.get_list_of_paths_crows_pigeons()
        crows_path = f"{destination_folder}/crows"
        pigeons_path = f"{destination_folder}/pigeons"
        if not os.path.exists(destination_folder):
            os.mkdir(destination_folder)
            os.mkdir(crows_path)
            os.mkdir(f"{crows_path}/labels")
            os.mkdir(pigeons_path)
            os.mkdir(f"{pigeons_path}/labels")
        else:
            raise Exception("folder already exists")


        for index, (image_name, image_path, label_text, bird_class) in enumerate(crows_pigeon_paths):
            bird_cat = "c" if bird_class == 0 else "p"
            image_name = f"{bird_cat}_{index}"
            if bird_class == 0:
                #shutil.copy(image_path, f"{crows_path}/{image_name}.jpg")
                shutil.copy(image_path, crows_path)
                with open(f"{crows_path}/labels/{image_name}.txt", "w") as f:
                    f.write(label_text)

            elif bird_class == 3:
                #shutil.copy(image_path, f"{pigeons_path}/{image_name}.jpg")
                shutil.copy(image_path, pigeons_path)

                with open(f"{pigeons_path}/labels/{image_name}.txt", "w") as f:
                    f.write(label_text)

    def load_removed_background_pictures(self, path: str):
        """give the folder of where the pictures are that have removed the background
        the path folder should contain two folders "pigeons" and "crows"
        """
        folders = os.listdir(path)
        if not ("pigeons" in folders and "crows" in folders):
            raise Exception("pigeons and crows doesn't exist in folder")

        for file in os.listdir(f"{path}/pigeons"):
            if ".DS_Store" in file:
                continue
            first_file_name = file.split(".")[0]
            self.images_dict[first_file_name].cleaned_file = f"{path}/pigeons/{file}"

        for file in os.listdir(f"{path}/crows"):
            if ".DS_Store" in file:
                continue
            first_file_name = file.split(".")[0]
            self.images_dict[first_file_name].cleaned_file = f"{path}/crows/{file}"

    def get_files_in_data_folder(self, path: str):
        images_paths = [f"{path}/images/{file_path}" for file_path in os.listdir(f"{path}/images")]
        label_file_names = [file_path for file_path in os.listdir(f"{path}/labels")]

        for label_file in label_file_names:
            if ".DS_Store" in label_file:
                continue
            # the same picture has the first part the same but might have had different augmentation
            first_file_name = label_file.split(".")[0]
            if first_file_name in self.images_dict:
                image = self.images_dict[first_file_name]
            else:
                with open(f"{path}/labels/{label_file}") as f:
                    label_text = f.read()
                image = ImageData(image_name=first_file_name,
                                  label_text=label_text)
            label_file_no_ext = os.path.splitext(label_file)[0]
            found_image = [file_name for file_name in images_paths if label_file_no_ext in file_name][0]
            image.image_paths.append(found_image)
            self.images_dict[first_file_name] = image

In [16]:
def get_all_images_objects():
    all_images = AllImages()
    repo_path = "/notebooks/DL---detection-of-birds-in-drone-images"
    all_images.load_removed_background_pictures(f"{repo_path}/data/Subject images crows/Subjects not pixelated")
    return all_images

all_images_objects = get_all_images_objects()


def get_average_bounding_box(label_path: str):
    """label_path is the location to the yoloflow.txt file of the image. takes all the bounding boxes of the image and calculate the average"""
    with open(label_path) as f:
        bounding_boxes = f.readlines()

    bounding_boxes = [bounding_box.strip().split(" ") for bounding_box in bounding_boxes]
    #bird_class, x_center_rel, y_center_rel, width_rel, height_rel = map(float, largest_bounding_box.split(" "))

    try:
        average_rel_width = sum([float(bounding_box[3]) for bounding_box in bounding_boxes])/len(bounding_boxes)
        average_rel_height = sum([float(bounding_box[4]) for bounding_box in bounding_boxes])/len(bounding_boxes)
    except ZeroDivisionError:
        average_rel_width = 0
        average_rel_height = 0
    return average_rel_width, average_rel_height


def add_picture_to_picture(image_path: str, label_path: str, average_rel_width, average_rel_height):
    """file_name is the name of the file to be augmented onto
    folder_path is where the filename is in
    average_rel_width and average_rel_height should come from the function get_average_bounding_box(label_path)
    """
    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)

    h, w, channels = img.shape
    height_pixels = int(h*average_rel_height)
    width_pixels = int(w*average_rel_width)

    cropped_pil = all_images_objects.get_random_cropped_images(width_pixels)
    if cropped_pil is None:
        print("cropped image above is None")
        cropped_pil = all_images_objects.get_random_cropped_images(width_pixels)
        if cropped_pil is None:
            print("it is still None, abort")
            return

    # Convert PIL to OpenCV format
    cropped_np = np.array(cropped_pil.convert("RGBA"))
    cropped_cv = cv2.cvtColor(cropped_np, cv2.COLOR_RGBA2BGRA)  # Preserve alpha channel

    # Choose a position to paste. At least 3 pixels from the border and half of the picture size to be added
    x_offset = random.randint(
        int(3 + width_pixels * 0.5),
        int(w - 3 - width_pixels * 0.5)
    )

    y_offset = random.randint(
        int(3 + height_pixels * 0.5),
        int(h - 3 - height_pixels * 0.5)
    )

    # Get overlay dimensions
    overlay_h, overlay_w = cropped_cv.shape[:2]

    # Make sure the overlay fits within the image bounds
    if y_offset + overlay_h > h:
        overlay_h = h - y_offset
        cropped_cv = cropped_cv[:overlay_h, :, :]

    if x_offset + overlay_w > w:
        overlay_w = w - x_offset
        cropped_cv = cropped_cv[:, :overlay_w, :]

    # Get the ROI from the original image
    roi = img[y_offset:y_offset+overlay_h, x_offset:x_offset+overlay_w]

    # Check channels
    ch = cropped_cv.shape[2]

    # Proper alpha blending
    if ch == 4:  # If we have an alpha channel
        # Extract the alpha channel and normalize to [0, 1]
        alpha = cropped_cv[:, :, 3] / 255.0

        # Create a 3-channel alpha mask
        alpha_3d = np.dstack((alpha, alpha, alpha))

        # Extract BGR channels from overlay
        overlay_bgr = cropped_cv[:, :, :3]

        # Calculate blended image
        blended = (1.0 - alpha_3d) * roi + alpha_3d * overlay_bgr

        # Replace the ROI with the blended image
        img[y_offset:y_offset+overlay_h, x_offset:x_offset+overlay_w] = blended.astype(np.uint8)
    else:
        # Just copy if no alpha
        img[y_offset:y_offset+overlay_h, x_offset:x_offset+overlay_w] = cropped_cv

    # below we are adding the newly generated image that is augmented with one extra bird
    # we add the label as well to the dataset
    x_offset_rel = x_offset/w
    y_offset_rel = y_offset/h
    x_rel = overlay_w/w
    y_rel = overlay_h/h
    new_yolo_label_str = f"0 {x_offset_rel+0.5*x_rel} {y_offset_rel+0.5*y_rel} {x_rel} {y_rel}"
    with open(label_path, "a") as f:
        f.write(f"\n{new_yolo_label_str}")
    cv2.imwrite(image_path, img)

FileNotFoundError: [WinError 3] The system cannot find the path specified: '/notebooks/DL---detection-of-birds-in-drone-images/data/Harmful Birds Detection.v1i.yolov11/test/images'

In [None]:
# tiles_per_image is a variable that was created in the image slicing phase
for original_path, tiles in tiles_per_image.items():
    if "train" not in original_path:
        continue

    label_file = original_path.replace("/images", "/labels")
    label_file = ".".join(label_file.split(".")[:-1]) + ".txt"

    avg_bb_width, avg_bb_height = get_average_bounding_box(label_file)
    if avg_bb_width == 0 or avg_bb_height == 0:
        continue

    # Here we scale the avg bounding boxes of the original image to a same sized bounding box in a tile/slice.
    target_width = avg_bb_width * tiles[0]["w"] / TARGET_RESOLUTION[0]
    target_height = avg_bb_height * tiles[0]["h"] / TARGET_RESOLUTION[1]

    for i in range(len(tiles)):
        tile_img_file_name = ".".join(original_path.split("\\")[-1].split(".")[:-1]) + "_tile-" + str(i) + ".png"
        tile_img_path = os.path.join(OUTPUT_FOLDER, "train", "images", tile_img_file_name)

        tile_label_file_name = tile_img_file_name.replace(".png", ".txt")
        tile_label_path = os.path.join(OUTPUT_FOLDER, "train", "labels", tile_label_file_name)

        add_picture_to_picture(tile_img_path, tile_label_path, target_width, target_height)


# Hyperparameter tuning

In [11]:
# Hyperparameter options
learning_rates = [0.001, 0.005, 0.01]
mosaic_values = [0.0, 0.5, 1.0]
scale_values = [0.3, 0.5]

# Prepare CSV to store results
results_file = "grid_search_results.csv"
with open(results_file, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Run", "lr0", "mosaic", "scale", "mAP50", "mAP50-95"])

# Run grid search
runs = list(product(learning_rates, mosaic_values, scale_values))
for i, (lr0, mosaic, scale) in enumerate(runs, start=1):
    print(f"\n🔁 Running configuration {i}/12: lr0={lr0}, mosaic={mosaic}, scale={scale}")

    model = YOLO("yolov8m.pt")  # Change to yolov8s.pt or other if needed
    if f"run_{i}_lr{lr0}_mos{mosaic}_sc{scale}" in os.listdir("grid_search_yolo"):
        print("already process this parameter")
        continue
    # Train
    results = model.train(
        data=yolo_data,
        epochs=50,
        imgsz=1280,
        batch=-1,
        lr0=lr0,
        mosaic=mosaic,
        scale=scale,
        patience=10,
        project="grid_search_yolo",
        name=f"run_{i}_lr{lr0}_mos{mosaic}_sc{scale}",
        exist_ok=True,
        seed=42,
        verbose=False,
    )

    # Get metrics
    metrics = model.val()
    mAP50 = metrics.box.map50
    mAP50_95 = metrics.box.map

    # Save results
    with open(results_file, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([i, lr0, mosaic, scale, mAP50, mAP50_95])

print("\n✅ Grid search complete. Results saved to:", results_file)


🔁 Running configuration 1/12: lr0=0.001, mosaic=0.0, scale=0.3
already process this parameter

🔁 Running configuration 2/12: lr0=0.001, mosaic=0.0, scale=0.5
already process this parameter

🔁 Running configuration 3/12: lr0=0.001, mosaic=0.5, scale=0.3
already process this parameter

🔁 Running configuration 4/12: lr0=0.001, mosaic=0.5, scale=0.5
already process this parameter

🔁 Running configuration 5/12: lr0=0.001, mosaic=1.0, scale=0.3
already process this parameter

🔁 Running configuration 6/12: lr0=0.001, mosaic=1.0, scale=0.5
already process this parameter

🔁 Running configuration 7/12: lr0=0.005, mosaic=0.0, scale=0.3
already process this parameter

🔁 Running configuration 8/12: lr0=0.005, mosaic=0.0, scale=0.5
already process this parameter

🔁 Running configuration 9/12: lr0=0.005, mosaic=0.5, scale=0.3
already process this parameter

🔁 Running configuration 10/12: lr0=0.005, mosaic=0.5, scale=0.5
already process this parameter

🔁 Running configuration 11/12: lr0=0.005, mosaic

[34m[1mtrain: [0mScanning C:\studie\Semester 2\content\Deep Learning\group_assignment\DL---detection-of-birds-in-drone-images\data\tiled_data\train\labels... 1502 images, 1035 backgrounds, 0 corrupt: 100%|██████████| 1502/1502 [00:07<00:00, 195.76it/s]


[34m[1mtrain: [0mNew cache created: C:\studie\Semester 2\content\Deep Learning\group_assignment\DL---detection-of-birds-in-drone-images\data\tiled_data\train\labels.cache
[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=1280 at 60.0% CUDA memory utilization.
[34m[1mtrain: [0mFast image access  (ping: 0.10.1 ms, read: 1424.3489.4 MB/s, size: 2657.5 KB)


[34m[1mtrain: [0mScanning C:\studie\Semester 2\content\Deep Learning\group_assignment\DL---detection-of-birds-in-drone-images\data\tiled_data\train\labels.cache... 1502 images, 1035 backgrounds, 0 corrupt: 100%|██████████| 1502/1502 [00:00<?, ?it/s]

[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 795.2132.0 MB/s, size: 2841.7 KB)



[34m[1mval: [0mScanning C:\studie\Semester 2\content\Deep Learning\group_assignment\DL---detection-of-birds-in-drone-images\data\tiled_data\val\labels... 178 images, 129 backgrounds, 0 corrupt: 100%|██████████| 178/178 [00:00<00:00, 192.67it/s]

[34m[1mval: [0mC:\studie\Semester 2\content\Deep Learning\group_assignment\DL---detection-of-birds-in-drone-images\data\tiled_data\val\images\DJI_0319_tile-0.png: 1 duplicate labels removed
[34m[1mval: [0mNew cache created: C:\studie\Semester 2\content\Deep Learning\group_assignment\DL---detection-of-birds-in-drone-images\data\tiled_data\val\labels.cache





Plotting labels to grid_search_yolo\run_15_lr0.01_mos0.5_sc0.3\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 1280 train, 1280 val
Using 0 dataloader workers
Logging results to [1mgrid_search_yolo\run_15_lr0.01_mos0.5_sc0.3[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/94 [00:21<?, ?it/s]


KeyboardInterrupt: 

# Best parameters

The best parameters we found were:
- Learning rate:
- Mosiac value:
- Scale:

In [12]:
model = YOLO("yolov8m.pt")
results = model.train(
    data=yolo_data,
    epochs=50,
    imgsz=1280,
    batch=-1,
    # lr0=lr0,
    # mosaic=mosaic,
    # scale=scale,
    patience=10,
    project="grid_search_yolo",
    name=f"Best model",
    exist_ok=True,
    seed=42,
    verbose=False,
)
model.save("best_model.pt")

Ultralytics 8.3.132 🚀 Python-3.12.10 torch-2.7.0+cu126 CPU (Intel Core(TM) i7-9750H 2.60GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1280, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=Best model, nbs=64, nms=Fa

[34m[1mtrain: [0mScanning /home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/train/labels... 1502 images, 1035 backgrounds, 0 corrupt: 100%|██████████| 1502/1502 [00:15<00:00, 98.87it/s]


[34m[1mtrain: [0mNew cache created: /home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/train/labels.cache
[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=1280 at 60.0% CUDA memory utilization.
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 3463.9±2090.7 MB/s, size: 2657.5 KB)


[34m[1mtrain: [0mScanning /home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/train/labels.cache... 1502 images, 1035 backgrounds, 0 corrupt: 100%|██████████| 1502/1502 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 790.9±158.9 MB/s, size: 2841.7 KB)


[34m[1mval: [0mScanning /home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/val/labels... 178 images, 129 backgrounds, 0 corrupt: 100%|██████████| 178/178 [00:02<00:00, 84.58it/s]

[34m[1mval: [0m/home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/val/images/DJI_0319_tile-0.png: 1 duplicate labels removed
[34m[1mval: [0mNew cache created: /home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/val/labels.cache





Plotting labels to grid_search_yolo/Best model/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 1280 train, 1280 val
Using 0 dataloader workers
Logging results to [1mgrid_search_yolo/Best model[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/94 [00:00<?, ?it/s]

: 

# Inference


In [4]:
to_infer_folder = "/home/mathijs/studie/Semester 2/content/deep_learning_group_project/DL---detection-of-birds-in-drone-images/data/tiled_data/test/images"
to_infer = [os.path.join(to_infer_folder, file) for file in os.listdir(to_infer_folder)]

model = YOLO("best.pt")
results = model(to_infer, verbose=True)

  return torch._C._cuda_getDeviceCount() > 0





: 