In [1]:
import json
import os
import subprocess
from tqdm import tqdm
from collections import Counter

In [2]:
os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'

In [3]:
HOME = "your_main_directory"

In [4]:
# Training data paths
coco_json_path_train = f"{HOME}/dataset_train/annotations/instances_default.json"
image_dir_train = f"{HOME}/dataset_train/images/train"    
output_dir_train = f"{HOME}/dataset_train/labels/train"  

# Validation data paths
coco_json_path_val = f"{HOME}/dataset_val/annotations/instances_default.json" 
image_dir_val = f"{HOME}/dataset_val/images/val"          
output_dir_val = f"{HOME}/dataset_val/labels/val"         

In [10]:
category_map = {
    4: 0,  # smoke → class 0
    5: 1   # fire → class 1
    # All other categories will be ignored
}

In [6]:
def coco_to_yolo(coco_json_path, image_dir, output_dir):
    # Load COCO JSON
    with open(coco_json_path) as f:
        data = json.load(f)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Statistics
    class_counts = Counter()
    skipped_annotations = 0
    
    for img in tqdm(data["images"], desc="Processing images"):
        img_id = img["id"]
        img_width = img["width"]
        img_height = img["height"]
        image_name = os.path.basename(img["file_name"])
        
        # Verify image exists
        image_path = os.path.join(image_dir, image_name)
        if not os.path.exists(image_path):
            print(f"\nMissing image: {image_path}")
            continue
        
        # Prepare label file
        label_name = os.path.splitext(image_name)[0] + ".txt"
        label_path = os.path.join(output_dir, label_name)
        
        # Get only fire/smoke annotations for this image
        annotations = [
            ann for ann in data["annotations"] 
            if ann["image_id"] == img_id 
            and ann["category_id"] in category_map
        ]
        
        # Write YOLO format labels
        with open(label_path, "w") as f_out:
            for ann in annotations:
                # Skip if not fire or smoke
                if ann["category_id"] not in category_map:
                    skipped_annotations += 1
                    continue
                
                # Convert COCO bbox to YOLO format
                x_min, y_min, w, h = ann["bbox"]
                x_center = (x_min + w / 2) / img_width
                y_center = (y_min + h / 2) / img_height
                w_norm = w / img_width
                h_norm = h / img_height
                
                # Get remapped class ID
                class_id = category_map[ann["category_id"]]
                class_counts[class_id] += 1
                
                # Write to file
                f_out.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n")
    
    # Print conversion statistics
    print("\nConversion Statistics:")
    print(f"- Total images processed: {len(data['images'])}")
    print(f"- Fire/Smoke annotations kept: {sum(class_counts.values())}")
    print(f"- Other annotations skipped: {skipped_annotations}")
    print("Class distribution:")
    for class_id, count in class_counts.most_common():
        print(f"  Class {class_id}: {count} annotations")



In [11]:
# ===== Convert TRAIN set =====
print("Processing TRAINING set:")
coco_to_yolo(
    coco_json_path=coco_json_path_train,
    image_dir=image_dir_train,
    output_dir=output_dir_train
)

Processing TRAINING set:


Processing images: 100%|█████████████████████████████████████████████████████████| 1142/1142 [00:00<00:00, 3138.41it/s]


Conversion Statistics:
- Total images processed: 1142
- Fire/Smoke annotations kept: 4441
- Other annotations skipped: 0
Class distribution:
  Class 1: 2914 annotations
  Class 0: 1527 annotations





In [12]:
print("\nProcessing VALIDATION set:")
coco_to_yolo(
    coco_json_path=coco_json_path_val,
    image_dir=image_dir_val,
    output_dir=output_dir_val
)


Processing VALIDATION set:


Processing images: 100%|███████████████████████████████████████████████████████████| 308/308 [00:00<00:00, 5506.54it/s]


Conversion Statistics:
- Total images processed: 308
- Fire/Smoke annotations kept: 1285
- Other annotations skipped: 0
Class distribution:
  Class 1: 761 annotations
  Class 0: 524 annotations





In [None]:
!python train.py \
--data "/train.yaml" \
--weights "" \
--cfg models/detect/gelan-c.yaml \
--imgsz 640 \
--batch-size 12 \
--epochs 300 \
--optimizer AdamW \
--device 0 \
--hyp data/hyps/hyp.scratch-high.yaml \
--workers 1 \
--cache ram \
--name gelan_c_300ep_smoke_opt