# Description of the Notebook

In this notebook, we we write a function to convert data from COCO format to YOLO format. We then convert the SODA 10M dataset into YOLO format.

In [1]:
import json
import os

In [2]:
def convert_coco_format_to_yolo(coco_json_path, output_directory):
    # Load COCO JSON
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)

    # Create output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Process each image
    for image in coco_data['images']:
        image_id = image['id']
        file_name = os.path.splitext(image['file_name'])[0] + '.txt'
        file_path = os.path.join(output_directory, file_name)
        
        # Get dimensions of the image
        img_width = image['width']
        img_height = image['height']

        with open(file_path, 'w') as file:
            # Find annotations for this image
            for annotation in coco_data['annotations']:
                if annotation['image_id'] == image_id:
                    # COCO bbox format: [x_min, y_min, width, height]
                    x_min, y_min, width, height = annotation['bbox']
                    
                    # Convert to YOLO format: [x_center, y_center, width, height]
                    x_center = x_min + width / 2
                    y_center = y_min + height / 2

                    # Normalize coordinates by image size
                    x_center /= img_width
                    y_center /= img_height
                    width /= img_width
                    height /= img_height

                    # Write to file
                    class_id = annotation['category_id']
                    file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")



In [3]:
coco_json_path = '/Users/mautushid/github/AI_project/labeled/annotations/instance_val.json'  # Update this path
output_directory = '/Users/mautushid/github/AI_project/yoloval'  # Update this path
convert_coco_format_to_yolo(coco_json_path, output_directory)