In [10]:
import os
from utils import convert_coco_to_yolo_polygons 
from datasets import load_dataset

In [11]:
def convert_coco_to_yolo_polygons(coco_polygons, image_width, image_height):
    """
    Converts COCO style polygons to a normalized YOLO style format, outputting a single list
    of coordinates.

    Parameters:
    - coco_polygons: List of polygons, each represented as a flat list of points.
    - image_width: The width of the original image.
    - image_height: The height of the original image.

    Returns:
    - List of coordinates in YOLO format, normalized by the image dimensions and flattened into a single list.
    """
    yolo_coordinates = []
    for polygon in coco_polygons:
        for i in range(0, len(polygon), 2):
            x_normalized = polygon[i] / image_width
            y_normalized = polygon[i + 1] / image_height
            yolo_coordinates.extend([x_normalized, y_normalized])

    return yolo_coordinates

In [12]:
# Make the train and val directories for images and labels 
def make_yolo_dirs(parent_dir):
    train_dir = os.path.join(parent_dir, "images", "train")
    train_labels = os.path.join(parent_dir, "labels", "train")

    val_dir = os.path.join(parent_dir, "images", "val")
    val_labels = os.path.join(parent_dir, "labels", "val")

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(train_labels, exist_ok=True)
    os.makedirs(val_labels, exist_ok=True)
    return train_dir, val_dir

In [13]:
def normalize_box(bbox, image_width, image_height):
    """
    Normalizes bounding box coordinates based on image dimensions.

    Parameters:
    - bbox: List of bounding box coordinates in the format [x_min, y_min, x_max, y_max].
    - image_width: The width of the image.
    - image_height: The height of the image.

    Returns:
    - List of normalized bounding box coordinates.
    """
    x_min, y_min, x_max, y_max = bbox
    x_min_normalized = x_min / image_width
    y_min_normalized = y_min / image_height
    x_max_normalized = x_max / image_width
    y_max_normalized = y_max / image_height

    return [x_min_normalized, y_min_normalized, x_max_normalized, y_max_normalized]

In [14]:
# Write the yolo formatted label id and polygons for each detection in the image
def get_lines(md, image_width, image_height):
    lines = []

    for row in md:
        label = row.get("label")
        label_id = row.get("label_id")
        box = row.get("box")
        box = normalize_box(box, image_width, image_height)
        box_str = " ".join([str(coord) for coord in box])

        coco_polygons = row.get("polygons")

        yolo_polygons = convert_coco_to_yolo_polygons(
            coco_polygons, image_width, image_height
        )
        yolo_polygons_str = " ".join([str(coord) for coord in yolo_polygons])
        yolo_line = f"{label_id} {box_str} {yolo_polygons_str}"
        lines.append(yolo_line)
    return lines

In [15]:
# Save the image and the text file

def write_image_and_text_file(image, image_name, lines, output_dir):
    image_path = os.path.join(output_dir, image_name)

    image_uuid = image_name.split(".")[0]
    text_name = f"{image_uuid}.txt"

    text_output_dir = output_dir.replace("images", "labels")
    text_path = os.path.join(text_output_dir, text_name)

    if image.mode != "RGB":
        image = image.convert("RGB")
        
    image.save(image_path)

    text_file = "\n".join(lines)
    with open(text_path, "w") as f:
        f.write(text_file)

In [16]:
def format_and_write(row, output_dir):
    image = row.get('image')
    width = row.get('width')
    height = row.get('height')
    md = row.get('mask_metadata')
    if md:
        image_name = row.get('image_id')
        lines = get_lines(md, width, height)
        write_image_and_text_file(image, image_name, lines, output_dir)


In [17]:
repo_id = "jordandavis/fashion_test"
workers = os.cpu_count()
ds = load_dataset(repo_id, split='train', trust_remote_code=True, num_proc=workers)

ds = ds.train_test_split(train_size=0.8)
train = ds["train"]
val = ds["test"]

In [18]:
parent_dir = "datasets/fashion_five"

# Make directories
train_dir, val_dir = make_yolo_dirs(parent_dir)


for dataset, output_dir in zip([train, val], [train_dir, val_dir]):
    
    iterable = map(format_and_write, dataset, [output_dir]*len(dataset))
    for _ in iterable:
        pass
