In [1]:
import os
import json
from PIL import Image
from tqdm import tqdm

def convert_yolo_to_coco(dataset_path, output_path):
    """
    Converts a dataset in YOLO format to COCO format.

    :param dataset_path: Path to the root of the YOLO dataset (e.g., 'Dataset/')
    :param output_path: Path where the output 'annotations' folder will be created.
    """
    class_names = ['wake'] # Your single class name

    # Create the output directory if it doesn't exist
    annotations_dir = os.path.join(output_path, 'annotations')
    os.makedirs(annotations_dir, exist_ok=True)

    for split in ['train', 'valid']:
        print(f"Processing split: {split}")
        
        coco_output = {
            "info": {},
            "licenses": [],
            "categories": [],
            "images": [],
            "annotations": []
        }

        # Create categories
        for i, class_name in enumerate(class_names):
            coco_output['categories'].append({
                "id": i,
                "name": class_name,
                "supercategory": "none"
            })
        
        image_id_counter = 0
        annotation_id_counter = 0
        
        image_dir = os.path.join(dataset_path, 'images', split)
        label_dir = os.path.join(dataset_path, 'labels', split)

        if not os.path.exists(image_dir):
            print(f"Warning: Image directory not found for split '{split}': {image_dir}")
            continue

        image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        for image_filename in tqdm(image_files, desc=f"Converting {split} set"):
            # Get image dimensions
            image_path = os.path.join(image_dir, image_filename)
            with Image.open(image_path) as img:
                width, height = img.size

            # Add image info
            image_info = {
                "id": image_id_counter,
                "file_name": image_filename,
                "width": width,
                "height": height
            }
            coco_output['images'].append(image_info)
            
            # Add annotation info
            label_filename = os.path.splitext(image_filename)[0] + '.txt'
            label_path = os.path.join(label_dir, label_filename)
            
            if os.path.exists(label_path):
                with open(label_path, 'r') as f:
                    for line in f:
                        parts = line.strip().split()
                        class_id, x_center, y_center, w, h = map(float, parts)
                        
                        # Convert YOLO format (normalized) to COCO format (absolute pixels)
                        x_min = (x_center - w / 2) * width
                        y_min = (y_center - h / 2) * height
                        box_width = w * width
                        box_height = h * height

                        annotation_info = {
                            "id": annotation_id_counter,
                            "image_id": image_id_counter,
                            "category_id": int(class_id),
                            "bbox": [x_min, y_min, box_width, box_height],
                            "area": box_width * box_height,
                            "iscrowd": 0
                        }
                        coco_output['annotations'].append(annotation_info)
                        annotation_id_counter += 1
            
            image_id_counter += 1

        # Save the JSON file
        output_json_path = os.path.join(annotations_dir, f'{split}.json')
        with open(output_json_path, 'w') as f:
            json.dump(coco_output, f, indent=4)
        print(f"Successfully created {output_json_path}")


if __name__ == '__main__':
    # NOTE: This assumes your 'Dataset' folder is in the same directory as this script.
    # If not, change the path accordingly.
    yolo_dataset_path = 'Dataset'
    output_path = yolo_dataset_path # Save 'annotations' folder inside 'Dataset'
    convert_yolo_to_coco(yolo_dataset_path, output_path)

Processing split: train


Converting train set: 100%|██████████| 9997/9997 [02:14<00:00, 74.06it/s]  


Successfully created Dataset/annotations/train.json
Processing split: valid


Converting valid set: 100%|██████████| 3443/3443 [00:43<00:00, 78.64it/s]  

Successfully created Dataset/annotations/valid.json





In [None]:
# this creates an annoatio folder inside Dataset - I rename it to annotation_YOLOX myself.

In [None]:
# apparantly it has a conflic with valid so I run the next one

In [2]:
import os
import json
from PIL import Image
from tqdm import tqdm
import datetime

def convert_yolo_to_coco(base_path, output_path):
    # This is the ONLY part we are changing
    # We now explicitly tell the script where to find the original YOLO labels
    original_labels_base_path = os.path.join(base_path, '..', 'labels')
    print(f"Looking for original YOLO labels in: {original_labels_base_path}")

    class_names = ['wake']
    annotations_dir = os.path.join(output_path, 'annotations')
    os.makedirs(annotations_dir, exist_ok=True)

    for split in ['test']: #'train', 'valid',
        print(f"Processing split: {split}")
        
        coco_output = {
            "info": {
                "description": "Vessel Wakes Dataset",
                "version": "1.0",
                "year": datetime.date.today().year,
                "date_created": datetime.datetime.utcnow().isoformat(' ')
            },
            "licenses": [],
            "categories": [],
            "images": [],
            "annotations": []
        }

        for i, class_name in enumerate(class_names):
            coco_output['categories'].append({"id": i, "name": class_name, "supercategory": "none"})
        
        image_id_counter = 0
        annotation_id_counter = 0
        
        image_dir = os.path.join(base_path, f'{split}2017')
        label_dir = os.path.join(original_labels_base_path, split)

        if not os.path.exists(image_dir):
            print(f"FATAL: Image directory not found: {image_dir}")
            return
        if not os.path.exists(label_dir):
            print(f"FATAL: Original labels directory not found: {label_dir}")
            return

        image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        for image_filename in tqdm(image_files, desc=f"Converting {split} set"):
            image_path = os.path.join(image_dir, image_filename)
            try:
                with Image.open(image_path) as img:
                    width, height = img.size
            except IOError:
                print(f"Warning: Skipping corrupted or unreadable image: {image_path}")
                continue

            image_info = {"id": image_id_counter, "file_name": image_filename, "width": width, "height": height}
            coco_output['images'].append(image_info)
            
            label_filename = os.path.splitext(image_filename)[0] + '.txt'
            label_path = os.path.join(label_dir, label_filename)
            
            if os.path.exists(label_path):
                with open(label_path, 'r') as f:
                    for line in f:
                        parts = line.strip().split()
                        class_id, x_center, y_center, w, h = map(float, parts)
                        x_min = (x_center - w / 2) * width
                        y_min = (y_center - h / 2) * height
                        box_width = w * width
                        box_height = h * height
                        annotation_info = {
                            "id": annotation_id_counter,
                            "image_id": image_id_counter,
                            "category_id": int(class_id),
                            "bbox": [x_min, y_min, box_width, box_height],
                            "area": box_width * box_height,
                            "iscrowd": 0
                        }
                        coco_output['annotations'].append(annotation_info)
                        annotation_id_counter += 1
            image_id_counter += 1

        output_json_path = os.path.join(annotations_dir, f'{split}.json')
        with open(output_json_path, 'w') as f:
            json.dump(coco_output, f, indent=4)
        print(f"Successfully created {output_json_path}")


if __name__ == '__main__':
    # This now points to your specific YOLOXannotation folder
    yolox_annotations_path = 'Dataset/YOLOXannotations'
    convert_yolo_to_coco(yolox_annotations_path, yolox_annotations_path)

Looking for original YOLO labels in: Dataset/YOLOXannotations/../labels
Processing split: test


Converting test set: 100%|██████████| 6165/6165 [01:56<00:00, 52.79it/s]  


Successfully created Dataset/YOLOXannotations/annotations/test.json


In [4]:
# run with valid2017 (rename it to valid2017) then after run again change it to val2017
# maybe there is a better way to do it now I'm tired and don't want to think