In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

This cell copies the dataset into the `Kaggle Working Directory` for data manipulaiton

In [10]:
import os
import shutil

# Define source and destination directories
src_dir = "/kaggle/input/dlp-object-detection-week-10/final_dlp_data"
dest_dir = "/kaggle/working"

# Walk through the source directory
for root, dirs, files in os.walk(src_dir):
    # Compute the relative path from the source directory
    rel_path = os.path.relpath(root, src_dir)
    
    # Compute the corresponding destination path
    dest_path = os.path.join(dest_dir, rel_path)
    
    # Create the destination directory if it does not exist
    os.makedirs(dest_path, exist_ok=True)
    
    # Copy each file to the destination while maintaining structure
    for file in files:
        src_file = os.path.join(root, file)
        dest_file = os.path.join(dest_path, file)
        shutil.copy2(src_file, dest_file)  # Copy while preserving metadata

print("Files copied successfully while maintaining directory structure.")

Files copied successfully while maintaining directory structure.



* This cell creates a validation dataset from the given train dataset
* 20% of the training data is kept for validation


In [11]:
import os
import random
import shutil

# Define paths
base_path = "/kaggle/working/final_dlp_data/"
train_images_path = os.path.join(base_path, "train", "images")
train_labels_path = os.path.join(base_path, "train", "labels")
val_images_path = os.path.join(base_path, "val", "images")
val_labels_path = os.path.join(base_path, "val", "labels")

# Create val directories if they don't exist
os.makedirs(val_images_path, exist_ok=True)
os.makedirs(val_labels_path, exist_ok=True)

# List all images and corresponding labels
image_files = sorted([f for f in os.listdir(train_images_path) if f.endswith((".jpeg", ".png"))])
label_files = sorted([f for f in os.listdir(train_labels_path) if f.endswith(".txt")])

# Ensure image-label pairs match
assert len(image_files) == len(label_files), "Mismatch between images and labels"

# Define split percentage
val_ratio = 0.2  # 20% for validation
val_size = int(len(image_files) * val_ratio)

# Select random samples for validation
val_indices = random.sample(range(len(image_files)), val_size)

# Move selected files to val folder
for idx in val_indices:
    img_file = image_files[idx]
    label_file = label_files[idx]

    shutil.move(os.path.join(train_images_path, img_file), os.path.join(val_images_path, img_file))
    shutil.move(os.path.join(train_labels_path, label_file), os.path.join(val_labels_path, label_file))

print(f"Moved {val_size} images and labels to validation set.")


Moved 1500 images and labels to validation set.


**Yolo Model Training**

In [16]:
yaml_content = """
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]

path: /kaggle/working/final_dlp_data # dataset root dir
train: train/images # train images (relative to 'path')
val: val/images # validation images (relative to 'path')
test: test/images # test images (relative to 'path')

names:
    0: aegypti
    1: albopictus
    2: anopheles
    3: culex
    4: culiseta
    5: japonicus/koreicus
"""

# Save to a file in Kaggle
file_path = "/kaggle/working/dataset.yaml"
with open(file_path, "w") as f:
    f.write(yaml_content)

print(f"YAML file saved at: {file_path}")


YAML file saved at: /kaggle/working/dataset.yaml


In [14]:
!pip install -q ultralytics

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m949.8/949.8 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h

**Training the YOLO model:**

In [17]:
from ultralytics import YOLO


model = YOLO("yolov9c.pt")

results = model.train(data="/kaggle/working/dataset.yaml", epochs=5, imgsz=640)



Ultralytics 8.3.96 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov9c.pt, data=/kaggle/working/dataset.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True,

[34m[1mtrain: [0mScanning /kaggle/working/final_dlp_data/train/labels... 6000 images, 0 backgrounds, 1 corrupt: 100%|██████████| 6000/6000 [00:04<00:00, 1245.90it/s]






[34m[1mtrain: [0mNew cache created: /kaggle/working/final_dlp_data/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning /kaggle/working/final_dlp_data/val/labels... 1500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1500/1500 [00:01<00:00, 1145.25it/s]


[34m[1mval: [0mNew cache created: /kaggle/working/final_dlp_data/val/labels.cache
Plotting labels to runs/detect/train2/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.9) with parameter groups 154 weight(decay=0.0), 161 weight(decay=0.0005), 160 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns/detect/train2[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5      11.2G      1.343      1.805      1.522         30        640: 100%|██████████| 375/375 [05:44<00:00,  1.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:28<00:00,  1.67it/s]


                   all       1500       1500      0.908      0.246      0.271      0.166

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5      10.9G       1.38      1.492      1.548         35        640: 100%|██████████| 375/375 [05:43<00:00,  1.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:27<00:00,  1.70it/s]

                   all       1500       1500      0.526      0.325      0.268      0.174






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5      10.9G      1.314      1.374      1.497         33        640: 100%|██████████| 375/375 [05:36<00:00,  1.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:24<00:00,  1.93it/s]

                   all       1500       1500      0.913      0.266      0.306      0.206






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5      10.9G      1.237      1.254      1.437         24        640: 100%|██████████| 375/375 [05:36<00:00,  1.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:24<00:00,  1.92it/s]

                   all       1500       1500      0.585      0.403      0.341      0.247






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5      10.9G      1.138      1.096      1.367         26        640: 100%|██████████| 375/375 [05:35<00:00,  1.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:24<00:00,  1.94it/s]

                   all       1500       1500      0.597      0.411      0.365      0.269






5 epochs completed in 0.513 hours.
Optimizer stripped from runs/detect/train2/weights/last.pt, 51.6MB
Optimizer stripped from runs/detect/train2/weights/best.pt, 51.6MB

Validating runs/detect/train2/weights/best.pt...
Ultralytics 8.3.96 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
YOLOv9c summary (fused): 156 layers, 25,323,874 parameters, 0 gradients, 102.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:25<00:00,  1.82it/s]


                   all       1500       1500      0.597      0.412      0.365      0.269
               aegypti          9          9          1          0     0.0791     0.0422
            albopictus        662        662      0.702      0.962      0.903      0.663
             anopheles         15         15          1          0     0.0404     0.0336
                 culex        657        657      0.555      0.954      0.863      0.636
              culiseta         88         88      0.169      0.466      0.175      0.141
    japonicus/koreicus         69         69      0.152      0.087      0.127     0.0958


  xa[xa < 0] = -1
  xa[xa < 0] = -1


Speed: 0.1ms preprocess, 11.6ms inference, 0.0ms loss, 1.4ms postprocess per image
Results saved to [1mruns/detect/train2[0m


**RT-DETR Model**

In [18]:
from ultralytics import RTDETR

model = RTDETR("rtdetr-l.pt")

results = model.train(data="/kaggle/working/dataset.yaml", epochs=5, imgsz=640)



Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/rtdetr-l.pt to 'rtdetr-l.pt'...


100%|██████████| 63.4M/63.4M [00:00<00:00, 306MB/s]


Ultralytics 8.3.96 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=rtdetr-l.pt, data=/kaggle/working/dataset.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True

[34m[1mtrain: [0mScanning /kaggle/working/final_dlp_data/train/labels.cache... 6000 images, 0 backgrounds, 1 corrupt: 100%|██████████| 6000/6000 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /kaggle/working/final_dlp_data/val/labels.cache... 1500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1500/1500 [00:00<?, ?it/s]


Plotting labels to runs/detect/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.9) with parameter groups 143 weight(decay=0.0), 206 weight(decay=0.0005), 226 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns/detect/train3[0m
Starting training for 5 epochs...

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
        1/5      13.4G     0.5619      6.747     0.4453         29        640: 100%|██████████| 375/375 [08:27<00:00,  1.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:31<00:00,  1.47it/s]


                   all       1500       1500      0.843       0.31      0.305      0.219

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
        2/5      13.4G     0.4052     0.8016     0.2997         33        640: 100%|██████████| 375/375 [08:21<00:00,  1.34s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:31<00:00,  1.48it/s]


                   all       1500       1500      0.817      0.325      0.295      0.211

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
        3/5      13.4G     0.3801     0.7709     0.2832         31        640: 100%|██████████| 375/375 [08:19<00:00,  1.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:32<00:00,  1.47it/s]


                   all       1500       1500      0.516      0.385      0.325      0.231

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
        4/5      13.4G     0.3696     0.7398     0.2769         24        640: 100%|██████████| 375/375 [08:19<00:00,  1.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:31<00:00,  1.48it/s]


                   all       1500       1500      0.738      0.358      0.355      0.262

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
        5/5      13.4G     0.3528     0.6987     0.2575         23        640: 100%|██████████| 375/375 [08:18<00:00,  1.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:31<00:00,  1.47it/s]


                   all       1500       1500      0.874      0.414      0.386      0.292

5 epochs completed in 0.745 hours.
Optimizer stripped from runs/detect/train3/weights/last.pt, 66.2MB
Optimizer stripped from runs/detect/train3/weights/best.pt, 66.2MB

Validating runs/detect/train3/weights/best.pt...
Ultralytics 8.3.96 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
rt-detr-l summary: 302 layers, 31,996,070 parameters, 0 gradients, 103.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:32<00:00,  1.44it/s]


                   all       1500       1500      0.873      0.415      0.386      0.293
               aegypti          9          9          1          0          0          0
            albopictus        662        662      0.899      0.817       0.83      0.608
             anopheles         15         15          1          0          0          0
                 culex        657        657      0.904      0.888      0.894      0.662
              culiseta         88         88      0.436      0.784      0.594      0.485
    japonicus/koreicus         69         69          1          0          0          0


  xa[xa < 0] = -1
  xa[xa < 0] = -1


Speed: 0.2ms preprocess, 16.9ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns/detect/train3[0m


# Submission 

In [20]:
import pandas as pd
import glob
import os
from ultralytics import YOLO, RTDETR

#Loading the sample submission file
submission_path = "/kaggle/input/dlp-object-detection-week-10/sample_submission.csv"
submission_df = pd.read_csv(submission_path)

#Loading all the trained models: RT-DETR & YOLO
yolo_model = YOLO("/kaggle/working/runs/detect/train2/weights/best.pt")
rtdetr_model = RTDETR("/kaggle/working/runs/detect/train3/weights/best.pt")  

#Path to test images
test_images = glob.glob("/kaggle/working/final_dlp_data/test/images/*.jpeg")

#Class names mapping
class_names = {0: "aegypti", 1: "albopictus", 2: "anopheles", 3: "culex", 4: "culiseta", 5: "japonicus/koreicus"}

#Dictionary to store final results
final_results = {}

# Run inference on test images using both models
for img_path in test_images:
    image_id = os.path.basename(img_path)

    # YOLO inference
    yolo_results = yolo_model(img_path)
    yolo_best_prediction = None

    for result in yolo_results:
        for box in result.boxes:
            conf = box.conf[0].item()
            if yolo_best_prediction is None or conf > yolo_best_prediction["Conf"]:
                x_min, y_min, x_max, y_max = box.xyxy[0].tolist()
                xcenter = (x_min + x_max) / (2 * result.orig_shape[1])
                ycenter = (y_min + y_max) / (2 * result.orig_shape[0])
                width = (x_max - x_min) / result.orig_shape[1]
                height = (y_max - y_min) / result.orig_shape[0]
                cls = int(box.cls[0].item())

                yolo_best_prediction = {
                    "LabelName": class_names[cls],
                    "Conf": conf,
                    "xcenter": xcenter,
                    "ycenter": ycenter,
                    "bbx_width": width,
                    "bbx_height": height
                }

    # RT-DETR inference
    rtdetr_results = rtdetr_model(img_path)
    rtdetr_best_prediction = None

    for result in rtdetr_results:
        for box in result.boxes:
            conf = box.conf[0].item()
            if rtdetr_best_prediction is None or conf > rtdetr_best_prediction["Conf"]:
                x_min, y_min, x_max, y_max = box.xyxy[0].tolist()
                xcenter = (x_min + x_max) / (2 * result.orig_shape[1])
                ycenter = (y_min + y_max) / (2 * result.orig_shape[0])
                width = (x_max - x_min) / result.orig_shape[1]
                height = (y_max - y_min) / result.orig_shape[0]
                cls = int(box.cls[0].item())

                rtdetr_best_prediction = {
                    "LabelName": class_names[cls],
                    "Conf": conf,
                    "xcenter": xcenter,
                    "ycenter": ycenter,
                    "bbx_width": width,
                    "bbx_height": height
                }

    # Select the best prediction between YOLO and RT-DETR
    if yolo_best_prediction and rtdetr_best_prediction:
        final_prediction = yolo_best_prediction if yolo_best_prediction["Conf"] > rtdetr_best_prediction["Conf"] else rtdetr_best_prediction
    elif yolo_best_prediction:
        final_prediction = yolo_best_prediction
    elif rtdetr_best_prediction:
        final_prediction = rtdetr_best_prediction
    else:
        # this is the edge case where I'm assuming both the models didn't provide any predicitons, default predicitons are used
        final_prediction = {
            "LabelName": "aegypti",
            "Conf": 0,
            "xcenter": 0,
            "ycenter": 0,
            "bbx_width": 0,
            "bbx_height": 0
        }

    final_results[image_id] = final_prediction

#Updating submission dataframe with final results
for index, row in submission_df.iterrows():
    image_id = row["ImageID"]
    if image_id in final_results:
        submission_df.at[index, "LabelName"] = final_results[image_id]["LabelName"]
        submission_df.at[index, "Conf"] = final_results[image_id]["Conf"]
        submission_df.at[index, "xcenter"] = final_results[image_id]["xcenter"]
        submission_df.at[index, "ycenter"] = final_results[image_id]["ycenter"]
        submission_df.at[index, "bbx_width"] = final_results[image_id]["bbx_width"]
        submission_df.at[index, "bbx_height"] = final_results[image_id]["bbx_height"]

output_path = "/kaggle/working/dual_submission_final.csv"
submission_df.to_csv(output_path, index=False)

print(f"Submission file saved at: {output_path}")



image 1/1 /kaggle/working/final_dlp_data/test/images/d6a596ab-07a1-4284-88d5-7cc3a08232a1.jpeg: 640x480 1 culex, 57.0ms
Speed: 3.2ms preprocess, 57.0ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /kaggle/working/final_dlp_data/test/images/d6a596ab-07a1-4284-88d5-7cc3a08232a1.jpeg: 640x640 1 culiseta, 45.4ms
Speed: 3.5ms preprocess, 45.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/working/final_dlp_data/test/images/0d7fda56-9014-4d0e-9802-993925febec0.jpeg: 640x480 1 culex, 18.9ms
Speed: 2.7ms preprocess, 18.9ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /kaggle/working/final_dlp_data/test/images/0d7fda56-9014-4d0e-9802-993925febec0.jpeg: 640x640 1 albopictus, 45.5ms
Speed: 2.8ms preprocess, 45.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/working/final_dlp_data/test/images/abcd1135-de3d-4374-bc38-112b4765fa07.jpeg: 640x480 1 albopictus, 1