In [None]:
import os
import json
import pandas as pd

# Paths to CSV files
csv_paths = {
    "train": "C:/Users/PMLS/Desktop/xloop_project/deepfashion2/img_info_dataframes/train.csv",
    "validation": "C:/Users/PMLS/Desktop/xloop_project/deepfashion2/img_info_dataframes/validation.csv",
    "test": "C:/Users/PMLS/Desktop/xloop_project/deepfashion2/img_info_dataframes/test.csv"
}

# Load CSV files into a dictionary
csv_data = {key: pd.read_csv(path) for key, path in csv_paths.items()}

# Column names
image_column = "path"  # Ensure this column exists in the CSV
width_column = "img_width"
height_column = "img_height"

def get_image_size(image_name, dataset_type="train"):
    """Fetch the image width and height from the CSV files."""
    df = csv_data.get(dataset_type)
    if df is None:
        print(f"Error: No data found for dataset type '{dataset_type}'.")
        return None, None

    if image_column not in df.columns:
        print(f"Error: Column '{image_column}' not found in {dataset_type}.csv")
        return None, None

    image_name = os.path.basename(image_name)  # Ensure only filename is used
    row = df[df[image_column].str.contains(image_name, case=False, na=False)]

    if not row.empty:
        return int(row[width_column].values[0]), int(row[height_column].values[0])
    else:
        print(f"Warning: Image {image_name} not found in {dataset_type}.csv")
        return None, None

def convert_annotation(ann_file_path, dataset_type="train"):
    """Convert annotation JSON file into a YOLO-friendly format."""
    with open(ann_file_path, "r") as file:
        data = json.load(file)

    # Extract image file name
    image_name = os.path.basename(ann_file_path).replace(".json", ".jpg")

    # Get image width & height
    img_w, img_h = get_image_size(image_name, dataset_type)

    if img_w is None or img_h is None:
        print(f"Skipping {image_name}: Could not find image size.")
        return

    # Save label file path
    label_file_path = ann_file_path.replace(".json", ".txt")

    # Open file in write mode
    with open(label_file_path, "w") as label_file:
        # Extract the bounding boxes
        for key, item in data.items():
            if isinstance(item, dict) and "bounding_box" in item:
                bbox = item["bounding_box"]
                category_id = item["category_id"]

                # Convert bbox to YOLO format
                x_center = (bbox[0] + bbox[2]) / 2 / img_w
                y_center = (bbox[1] + bbox[3]) / 2 / img_h
                width = (bbox[2] - bbox[0]) / img_w
                height = (bbox[3] - bbox[1]) / img_h

                yolo_format = f"{category_id} {x_center} {y_center} {width} {height}\n"
                label_file.write(yolo_format)

        print(f"Saved: {label_file_path}")


# Example usage:
ann_dir = "C:/Users/PMLS/Desktop/xloop_project/deepfashion2/train/annos/"
for ann_file in os.listdir(ann_dir):
    if ann_file.endswith(".json"):
        convert_annotation(os.path.join(ann_dir, ann_file), dataset_type="train")
