**Training Megadetector on the COD10K Dataset**

Cloning the CameraTraps Repository

In [1]:
!git clone https://github.com/Rider9797/Cameratraps.git

Cloning into 'Cameratraps'...
remote: Enumerating objects: 14, done.[K
remote: Counting objects: 100% (14/14), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 14 (delta 1), reused 13 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (14/14), 6.91 KiB | 1.38 MiB/s, done.
Resolving deltas: 100% (1/1), done.


In [11]:
!ls /kaggle/working

CameraTraps


In [2]:
import os
old_name = "/kaggle/working/Cameratraps"
new_name = "/kaggle/working/CameraTraps"

# Rename the folder
os.rename(old_name, new_name)

**Moving the uploaded dataset on kaggle to the project directory**

In [3]:
import shutil

shutil.copytree(
    src="/kaggle/input/cod-10k-data/COD10K",
    dst="/kaggle/working/CameraTraps/PW_FT_detection/data/COD10K",
    dirs_exist_ok=True  
)

'/kaggle/working/CameraTraps/PW_FT_detection/data/COD10K'

Labeling all humans as 1 in the test directory

In [3]:
import os

# Set your labels directory path
label_dir = "/kaggle/working/Cameratraps/PW_FT_detection/data/COD10K/labels/test"  # e.g., "/kaggle/working/CameraTraps/PW_FT_detection/data/labels"

# Process each file
for filename in os.listdir(label_dir):
    if filename.endswith(".txt") and "Human" in filename:
        filepath = os.path.join(label_dir, filename)
        
        # Read and modify lines
        with open(filepath, 'r') as f:
            lines = f.readlines()
        
        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if parts:  # Skip empty lines
                if parts[0] == '0':  # Change class 0 to 1
                    parts[0] = '1'
                new_lines.append(" ".join(parts) + "\n")
        
        # Write back changes
        with open(filepath, 'w') as f:
            f.writelines(new_lines)

print(f"Processed {len(os.listdir(label_dir))} files. Human labels updated from 0 to 1.")

Processed 1776 files. Human labels updated from 0 to 1.


Labelling all human training images 1

In [4]:
import os

# Set your labels directory path
label_dir = "/kaggle/working/Cameratraps/PW_FT_detection/data/COD10K/labels/train"  # e.g., "/kaggle/working/CameraTraps/PW_FT_detection/data/labels"

# Process each file
for filename in os.listdir(label_dir):
    if filename.endswith(".txt") and "Human" in filename:
        filepath = os.path.join(label_dir, filename)
        
        # Read and modify lines
        with open(filepath, 'r') as f:
            lines = f.readlines()
        
        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if parts:  # Skip empty lines
                if parts[0] == '0':  # Change class 0 to 1
                    parts[0] = '1'
                new_lines.append(" ".join(parts) + "\n")
        
        # Write back changes
        with open(filepath, 'w') as f:
            f.writelines(new_lines)

print(f"Processed {len(os.listdir(label_dir))} files. Human labels updated from 0 to 1.")

Processed 2583 files. Human labels updated from 0 to 1.


Labelling all human validation images as 1

In [5]:
import os

# Set your labels directory path
label_dir = "/kaggle/working/Cameratraps/PW_FT_detection/data/COD10K/labels/val"  # e.g., "/kaggle/working/CameraTraps/PW_FT_detection/data/labels"

# Process each file
for filename in os.listdir(label_dir):
    if filename.endswith(".txt") and "Human" in filename:
        filepath = os.path.join(label_dir, filename)
        
        # Read and modify lines
        with open(filepath, 'r') as f:
            lines = f.readlines()
        
        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if parts:  # Skip empty lines
                if parts[0] == '0':  # Change class 0 to 1
                    parts[0] = '1'
                new_lines.append(" ".join(parts) + "\n")
        
        # Write back changes
        with open(filepath, 'w') as f:
            f.writelines(new_lines)

print(f"Processed {len(os.listdir(label_dir))} files. Human labels updated from 0 to 1.")

Processed 318 files. Human labels updated from 0 to 1.


Dataset Preparation: Get it into a format that the megadetector wants

In [None]:
import os, shutil, json
from PIL import Image

BASE = '/content/drive/MyDrive/Project/cod10k/COD10K-v3'


TRAIN_JSON = os.path.join(BASE, 'Train/CAM_Instance_Train.json')
TEST_JSON  = os.path.join(BASE, 'Test/CAM_Instance_Test.json')
TRAIN_IMG  = os.path.join(BASE, 'Train/Image')
TEST_IMG   = os.path.join(BASE, 'Test/Image')

DATASET_ROOT = '/content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection/data/COD10K'
for sub in ('images/train','labels/train','images/test','labels/test'):
    os.makedirs(os.path.join(DATASET_ROOT, sub), exist_ok=True)


In [None]:
!ls /content/drive/MyDrive/dataset/cod10k/COD10K-v3/Train

CAM_Instance_Train.json        GT_Edge	    GT_Object
CAM-NonCAM_Instance_Train.txt  GT_Instance  Image


Making Labels files in YOLO Format out of the COD10K annotations

In [None]:
# 1) Copy all training .jpgs
for fn in os.listdir(TRAIN_IMG):
    if fn.lower().endswith('.jpg'):
        shutil.copy(os.path.join(TRAIN_IMG,fn),
                    os.path.join(DATASET_ROOT,'images/train',fn))

# 2) Load COCO JSON
with open(TRAIN_JSON) as f:
    coco = json.load(f)

# 3) Build lookup and group bboxes
img_meta  = {img['id']:(img['file_name'],img['width'],img['height']) for img in coco['images']}
ann_by_id = {}
for ann in coco['annotations']:
    ann_by_id.setdefault(ann['image_id'],[]).append(ann['bbox'])

# 4) Write YOLO .txt labels
for img_id, bbs in ann_by_id.items():
    fn, W, H = img_meta[img_id]
    lines = []
    # Determine class (1 for Human, 0 otherwise)
    class_id = 1 if 'Human' in fn else 0
    for x,y,w,h in bbs:
        xc = (x + w/2)/W; yc = (y + h/2)/H
        wn = w/W;      hn = h/H
        lines.append(f"{class_id} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}")
    out = os.path.join(DATASET_ROOT,'labels/train',fn.replace('.jpg','.txt'))
    with open(out,'w') as f: f.write("\n".join(lines))

In [None]:
# 1) Copy all test .jpgs
for fn in os.listdir(TEST_IMG):
    if fn.lower().endswith('.jpg'):
        shutil.copy(os.path.join(TEST_IMG,fn),
                    os.path.join(DATASET_ROOT,'images/test',fn))

# 2) Load COCO JSON
with open(TEST_JSON) as f:
    coco = json.load(f)

# 3) Build lookup
img_meta  = {img['id']:(img['file_name'],img['width'],img['height']) for img in coco['images']}
ann_by_id = {}
for ann in coco['annotations']:
    ann_by_id.setdefault(ann['image_id'],[]).append(ann['bbox'])

# 4) Write YOLO .txt labels
for img_id, bbs in ann_by_id.items():
    fn, W, H = img_meta[img_id]
    lines = []
    class_id = 1 if 'Human' in fn else 0
    for x,y,w,h in bbs:
        xc = (x + w/2)/W; yc = (y + h/2)/H
        wn = w/W;      hn = h/H
        lines.append(f"{class_id} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}")
    out = os.path.join(DATASET_ROOT,'labels/test',fn.replace('.jpg','.txt'))
    with open(out,'w') as f: f.write("\n".join(lines))


Empty labels for NonCAM Images

In [None]:
def process_images(image_dir, json_path, output_img_dir, output_label_dir):
    for fn in os.listdir(image_dir):
        if fn.lower().endswith('.jpg')  and 'NonCAM' in fn:
            shutil.copy(os.path.join(image_dir, fn),
                      os.path.join(output_img_dir, fn))
            # Create empty label file
            out = os.path.join(output_label_dir, fn.replace('.jpg','.txt'))
            open(out, 'w').close()
            print(f"Created empty label for NonCAM image: {fn}")

print("Processing training data...")
process_images(TRAIN_IMG, TRAIN_JSON,
              os.path.join(DATASET_ROOT, 'images/train'),
              os.path.join(DATASET_ROOT, 'labels/train'))

print("\nProcessing test data...")
process_images(TEST_IMG, TEST_JSON,
              os.path.join(DATASET_ROOT, 'images/test'),
              os.path.join(DATASET_ROOT, 'labels/test'))

Processing training data...
Created empty label for NonCAM image: COD10K-NonCAM-4-Terrestial-2678.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-2-Ocean-3405.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-4-Sky-4291.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-2-Ocean-3156.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-3-Sand-3956.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-1-Indoor-3102.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-3-Sand-4031.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-2-Ocean-3556.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-4-Sky-4274.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-2-Ocean-3503.jpg
Created empty label for NonCAM image: COD10K-NonCAM-5-Background-2-Ocean-3469.jpg
Created empty label for NonCAM image: COD10K-NonCAM-4-Terrestial-2773.jpg
Created e

Just checking if the labels file we made exist

In [None]:
lbl_path = os.path.join(DATASET_ROOT, 'labels/test','COD10K-NonCAM-2-Aquatic-334.txt')
if os.path.exists(lbl_path):
  print("EXISTS!!")

else:
  print("Nope!")

EXISTS!!


Installing all requirements required by the Megadetector

In [5]:
%cd /kaggle/working/CameraTraps/PW_FT_detection
!pip install -r requirements.txt


/kaggle/working/CameraTraps/PW_FT_detection
Collecting PytorchWildlife (from -r requirements.txt (line 1))
  Downloading pytorchwildlife-1.2.2.tar.gz (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.1/72.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ultralytics (from -r requirements.txt (line 2))
  Downloading ultralytics-8.3.127-py3-none-any.whl.metadata (37 kB)
Collecting munch (from -r requirements.txt (line 3))
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Collecting wget (from -r requirements.txt (line 4))
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting supervision==0.23.0 (from PytorchWildlife->-r requirements.txt (line 1))
  Downloading supervision-0.23.0-py3-none-any.whl.metadata (14 kB)
Collecting gradio (from PytorchWildlife->-r requirements.txt (line 1))
  Downloading gradio-5.29.0-py3-none-any.whl.metadata (1

Loading the pretrained weights

In [None]:
import sys
sys.path.insert(0, '/content/drive/MyDrive/Project_Rayed/CameraTraps/PW_FT_detection')
from utils import get_model_path
model_path = get_model_path("MDV6-yolov9-e")


In [None]:
import os
print("Current working directory:", os.getcwd())
!ls -l  # List files to verify config.yaml exists

Current working directory: /content/drive/MyDrive/Project_Rayed/CameraTraps/PW_FT_detection
total 25824
-rw------- 1 root root      521 Apr 30 06:33 config.yaml
drwx------ 4 root root     4096 Apr 30 14:10 data
-rw------- 1 root root     4322 Apr 30 06:33 environment.yaml
-rw------- 1 root root     2219 Apr 30 06:33 main.py
-rw------- 1 root root 20799488 Apr 30 06:33 MDV6-yolov9e.ptj5thvpjz.tmp
drwx------ 2 root root     4096 Apr 30 14:00 __pycache__
-rw------- 1 root root     7015 Apr 30 06:33 README.md
-rw------- 1 root root       38 Apr 30 14:23 requirements.txt
drwx------ 2 root root     4096 Apr 30 06:30 runs
-rw------- 1 root root     1412 Apr 30 06:33 utils.py
-rw------- 1 root root  5613764 Apr 30 06:33 yolo11n.pt


In [None]:
from munch import Munch
import yaml

with open("./config.yaml") as f:
        cfg = Munch(yaml.load(f, Loader=yaml.FullLoader))
cfg.weights = model_path

with open("config.yaml", "w") as f:
    yaml.dump(dict(cfg), f)

Making the validation folders

In [None]:
import os
import random
import shutil

# Set your paths (modify these as needed)
data_root = '/content/drive/MyDrive/Project_Rayed/CameraTraps/PW_FT_detection/data/COD10K'
train_img_dir = os.path.join(data_root, 'images/train')
train_label_dir = os.path.join(data_root, 'labels/train')

# Create validation directories
val_img_dir = os.path.join(data_root, 'images/val')
val_label_dir = os.path.join(data_root, 'labels/val')
os.makedirs(val_img_dir, exist_ok=True)
os.makedirs(val_label_dir, exist_ok=True)

# Get all image files (assuming .jpg format)
image_files = [f for f in os.listdir(train_img_dir) if f.endswith('.jpg')]
random.shuffle(image_files)  # Shuffle randomly

# Calculate split index (90:10 ratio)
split_idx = int(0.9 * len(image_files))
train_files = image_files[:split_idx]
val_files = image_files[split_idx:]

# Move validation files to their new directories
for img_file in val_files:
    # Move image
    src_img = os.path.join(train_img_dir, img_file)
    dst_img = os.path.join(val_img_dir, img_file)
    shutil.move(src_img, dst_img)

    # Move corresponding label file
    label_file = img_file.replace('.jpg', '.txt')
    src_label = os.path.join(train_label_dir, label_file)
    dst_label = os.path.join(val_label_dir, label_file)
    if os.path.exists(src_label):  # Check if label exists
        shutil.move(src_label, dst_label)

print(f"Split complete: {len(train_files)} train, {len(val_files)} validation")
print(f"Validation images moved to: {val_img_dir}")
print(f"Validation labels moved to: {val_label_dir}")

Split complete: 5400 train, 600 validation
Validation images moved to: /content/drive/MyDrive/Project_Rayed/CameraTraps/PW_FT_detection/data/COD10K/images/val
Validation labels moved to: /content/drive/MyDrive/Project_Rayed/CameraTraps/PW_FT_detection/data/COD10K/labels/val


First approach to loading weights(run inference and the weights load automatically)

In [None]:
#this one is to run it first so we get the weights
%cd /content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection
!python main.py --task validation --config config.yaml

/content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection
YOLOv9e summary: 721 layers, 58,147,225 parameters, 0 gradients, 192.7 GFLOPs
Ultralytics 8.3.119 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLOv9e summary (fused): 279 layers, 57,378,713 parameters, 0 gradients, 189.1 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.6±0.2 ms, read: 0.1±0.0 MB/s, size: 92.0 KB)
[34m[1mval: [0mScanning /content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection/data/data_example/labels/train... 10 images, 0 backgrounds, 0 corrupt: 100% 10/10 [00:13<00:00,  1.34s/it]
[34m[1mval: [0mNew cache created: /content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection/data/data_example/labels/train.cache
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% 1/1 [00:00<00:00,  1.04it/s]
                   all         10         10      0.993          1      0.995      0.754
                animal         10         10      0.993        

this is my config.yaml for the run above:
# General
model: YOLO #YOLO or RTDETR
model_name: MDV6-yolov9-e
data: /content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection/data/data_example.yaml
test_data: /content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection/data/data_example/images/val
task: validation
exp_name: MDV6-yolov9-e

# Train
epochs: 20
batch_size_train: 16
imgsz: 640
device_train: 0
workers: 8
optimizer: auto
lr0: 0.01
patience: 5
save_period: 1
val: True
resume: False
weights: None # Path to weight to resume training

# Validation
save_json: True
plots: True
device_val: 0
batch_size_val: 12

and my data_example.yaml:
names:
  0: animal
  1: person
  2: vehicle
path: /content/drive/MyDrive/dataset/CameraTraps/PW_FT_detection/data/data_example
test: images/val
train: images/train
val:   images/val

More Training Prep

In [None]:
# === General ===
model: YOLO
model_name: MDV6-yolov9-c
data: ./data/COD10K.yaml
task: train
exp_name: COD10K_finetune

# === Training ===
epochs: 20
batch_size_train: 16
imgsz: 640
device_train: 0
workers: 8
optimizer: auto
lr0: 0.01
patience: 5       # <-- already present
save_period: 1    # <-- make sure this line is here
val: True
resume: False
weights: None

# === Validation (during training) ===
save_json: True
plot: True
device_val: 0
batch_size_val: 12

current config for all the stuf going on

SyntaxError: invalid syntax (<ipython-input-10-70dca45d6676>, line 5)

Assigning the last model for more epochs

In [10]:
import yaml

# 1. Load the config file
config_path = "/kaggle/working/CameraTraps/PW_FT_detection/config.yaml"
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# 2. Update the weights path (replace with your new path)
new_weights_path = "/kaggle/working/CameraTraps/PW_FT_detection/runs/train_COD10K_finetune/exp/weights/last.pt"
config['weights'] = "/root/.cache/torch/hub/checkpoints/MDV6-yolov9e.pt"
config["epochs"] = 40
config["resume"] = False
config["patience"] = 10

# 3. Save the modified config
with open(config_path, 'w') as f:
    yaml.dump(config, f, sort_keys=False)  # sort_keys=False preserves order

print(f"Weights path updated to: {new_weights_path}")

Weights path updated to: /kaggle/working/CameraTraps/PW_FT_detection/runs/train_COD10K_finetune/exp/weights/last.pt


Finally running main.py to start training

In [11]:
!python main.py

YOLOv9c summary: 358 layers, 25,531,545 parameters, 0 gradients, 103.7 GFLOPs
Ultralytics 8.3.127 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/CameraTraps/PW_FT_detection/data/COD10K.yaml, degrees=0.0, deterministic=True, device=cuda:0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=40, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/root/.cache/torch/hub/checkpoints/MDV6b-yolov9c.pt, momentum=0.937, mosaic=1.0, multi_scale