In [13]:
import os
import pandas as pd
from shutil import copy2
from PIL import Image
import ast

In [8]:
CSV_TRAIN = os.path.join(os.pardir, "data", "raw", "DeepFashion2", "img_info_dataframes", "train.csv")
CSV_VAL = os.path.join(os.pardir, "data", "raw", "DeepFashion2", "img_info_dataframes", "validation.csv")

In [9]:
YOLO_TRAIN_LABEL_DIR = os.path.join(os.pardir, "yolo", "train", "labels")
YOLO_VAL_LABEL_DIR = os.path.join(os.pardir, "yolo", "val", "labels")

In [10]:
def convert_bbox_to_yolo(img_w, img_h, x_min, y_min, width, height):
    x_center = (x_min + width / 2) / img_w
    y_center = (y_min + height / 2) / img_h
    w_norm = width / img_w
    h_norm = height / img_h
    return x_center, y_center, w_norm, h_norm

In [14]:
def process_csv(csv_path, label_dest_dir):
    df = pd.read_csv(csv_path)

    for _, row in df.iterrows():
        # Parse bbox string to list
        try:
            bbox = ast.literal_eval(row["b_box"])
        except Exception as e:
            print(f"Skipping row due to bbox parse error: {row['path']}")
            continue

        x_min, y_min, width, height = bbox
        img_w = row["img_width"]
        img_h = row["img_height"]

        x_center, y_center, w, h = convert_bbox_to_yolo(img_w, img_h, x_min, y_min, width, height)

        class_id = int(row["category_id"]) - 1  # zero-based class index for YOLO

        label_filename = os.path.splitext(os.path.basename(row["path"]))[0] + ".txt"
        label_path = os.path.join(label_dest_dir, label_filename)

        # Append label if file exists (multiple boxes per image)
        with open(label_path, "a") as f:
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")

In [15]:
process_csv(CSV_TRAIN, YOLO_TRAIN_LABEL_DIR)
process_csv(CSV_VAL, YOLO_VAL_LABEL_DIR)

print("✅ YOLO-format dataset created!")

✅ YOLO-format dataset created!
