In [6]:
import json
import os
from PIL import Image
import yaml

In [3]:
# PARAMETERS
# paths
train_annotations_json = "annotations.json"
val_annotations_json = "annotations.json"
test_annotations_json = "annotations.json"
path_split = ".\\CustomImages\\" # folder with sets of train, val and test
folder_train_split = "train"
folder_val_split = "val"
folder_test_split = "test"
path_train_folder_split = os.path.join(path_split, folder_train_split)
path_val_folder_split = os.path.join(path_split, folder_val_split)
path_test_folder_split = os.path.join(path_split, folder_test_split)
folder_yolo_train = "labels"

In [4]:


def validate_coco_dataset(coco_annotation_path, images_folder_path):
    """
    Validates a COCO dataset to check if the dimensions, IDs, and naming conventions are correct.

    Parameters:
    coco_annotation_path (str): Path to the COCO annotations JSON file.
    images_folder_path (str): Path to the folder containing images.

    Returns:
    bool: True if the dataset is valid, False otherwise.
    """
    # Load the COCO annotations
    with open(coco_annotation_path, 'r') as file:
        coco_data = json.load(file)

    # Create a set to store unique image IDs
    image_ids = set()

    # Validate images
    for image_info in coco_data['images']:
        image_id = image_info['id']
        file_name = image_info['file_name']
        width = image_info['width']
        height = image_info['height']

        # Check if image ID is unique
        if image_id in image_ids:
            print(f"Duplicate image ID found: {image_id}")
            return False
        image_ids.add(image_id)

        # Check if the image file exists
        image_path = os.path.join(images_folder_path, file_name)
        if not os.path.exists(image_path):
            print(f"Image file not found: {file_name}")
            return False

        # Check if the image dimensions match
        with Image.open(image_path) as img:
            if img.width != width or img.height != height:
                print(f"Image dimensions do not match for {file_name}: "
                      f"expected ({width}, {height}), got ({img.width}, {img.height})")
                return False

    # Validate annotations
    for annotation in coco_data['annotations']:
        image_id = annotation['image_id']
        if image_id not in image_ids:
            print(f"Annotation references non-existent image ID: {image_id}")
            return False

    print("COCO dataset validation completed successfully.")
    return True




In [5]:
print("Check that annotation for training is correct")
print(validate_coco_dataset(os.path.join(path_train_folder_split,train_annotations_json), path_train_folder_split))
print("Check that annotation for validation is correct")
print(validate_coco_dataset(os.path.join(path_val_folder_split,val_annotations_json), path_val_folder_split))
print("Check that annotation for test is correct")
print(validate_coco_dataset(os.path.join(path_test_folder_split,test_annotations_json), path_test_folder_split))

Check that annotation for training is correct
COCO dataset validation completed successfully.
True
Check that annotation for validation is correct
COCO dataset validation completed successfully.
True
Check that annotation for test is correct
COCO dataset validation completed successfully.
True


In [6]:
def convert_coco_to_yolo_segmentation(coco_json_path, output_dir):
    """
    Converts COCO segmentation annotations to YOLO format.

    Args:
    coco_json_path (str): Path to the COCO annotations JSON file.
    output_dir (str): Path to the folder to save YOLO annotations.
    """
    # Load COCO JSON
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)

    # Create a directory for YOLO annotations
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Create a mapping from COCO category IDs to YOLO class IDs (1-9 to 0-8)
    category_mapping = {category['id']: category['id'] - 1 for category in coco_data['categories']}

    # Iterate over each image in the COCO dataset
    for image_info in coco_data['images']:
        image_id = image_info['id']
        image_width = image_info['width']
        image_height = image_info['height']
        image_filename = image_info['file_name']

        # Prepare the YOLO annotation filename
        yolo_annotation_filename = os.path.splitext(image_filename)[0] + ".txt"  # separate from .jpg
        yolo_annotation_path = os.path.join(output_dir, yolo_annotation_filename)

        # Open the YOLO annotation file for writing
        with open(yolo_annotation_path, 'w') as yolo_file:
            # Iterate over each annotation
            for annotation in coco_data['annotations']:
                if annotation['image_id'] == image_id:
                    category_id = annotation['category_id']
                    segmentation = annotation['segmentation']

                    # Check if the annotation has polygon segmentation
                    if isinstance(segmentation, list):
                        for polygon in segmentation:
                            # Normalize the coordinates by the dimensions of the image
                            normalized_polygon = [(x / image_width, y / image_height) for x, y in zip(polygon[0::2], polygon[1::2])] 
                             # x: even indices, y: odd indices

                            # Flatten the normalized coordinates
                            normalized_polygon_str = ' '.join([f"{x} {y}" for x, y in normalized_polygon])

                            # Get the YOLO class ID
                            yolo_class_id = category_mapping[category_id]

                            # Write the annotation in YOLO format
                            yolo_file.write(f"{yolo_class_id} {normalized_polygon_str}\n")

    print("Conversion completed successfully.")




In [16]:
# Example usage
convert_coco_to_yolo_segmentation(os.path.join(path_train_folder_split, train_annotations_json), os.path.join(path_train_folder_split, folder_yolo_train))


Conversion completed successfully.


In [27]:
def create_yaml_file(file_path, train_path, val_path, nc, names):
    """
    Create yaml for yolo.

        Args:
            file_path: path of the yaml
            train_path: path of the train set
            val_path: path of the val set
            nc: number of categories
            names: names of the categories
    """
    data = {
        'train': train_path,
        'val': val_path,
        'nc': nc,
        'names': names
    }

    with open(file_path, 'w') as file:
        yaml.dump(data, file, default_flow_style=False)

In [20]:
with open(os.path.join(path_train_folder_split, train_annotations_json), 'r') as f:
    coco_data = json.load(f)

names = [class_name['name'] for class_name in coco_data["categories"]]

# Specify the paths and information
train_path = 'CustomImages/train/images'
val_path = 'CustomImages//val/images'

names = ['cat', 'dog']
nc = len(names)
file_path = 'data.yaml'

# Create the YAML file
create_yaml_file(file_path, train_path, val_path, nc, names)


In [26]:
with open(os.path.join(path_train_folder_split, train_annotations_json), 'r') as f:
    coco_data = json.load(f)

names_categories = [class_name['name'] for class_name in coco_data["categories"]]
len(names_categories)

9