In [None]:
import os
import shutil
import random
import cv2
import scipy.io
import yaml

# === CONFIGURATION ===
SRC_DIR = "data"          # Root directory with 'images' and 'labels'
DST_DIR = "yolo_data"     # Destination folder for YOLO dataset
CLASSES = ['airplane', 'car_side', 'chair', 'cup']
SPLIT_RATIOS = (0.7, 0.15, 0.15)  # Train, Val, Test
random.seed(42)

# === STEP 1: Create YOLO folder structure ===
for split in ['train', 'val', 'test']:
    os.makedirs(f"{DST_DIR}/{split}/images", exist_ok=True)
    os.makedirs(f"{DST_DIR}/{split}/labels", exist_ok=True)

# === STEP 2: Collect all annotated images ===
class_map = {cls: idx for idx, cls in enumerate(CLASSES)}
all_images, skipped_images = [], []

for cls in CLASSES:
    img_dir = f"{SRC_DIR}/images/{cls}"
    label_dir = f"{SRC_DIR}/labels/{cls}"

    if not os.path.exists(img_dir) or not os.path.exists(label_dir):
        print(f"‚ö†Ô∏è Missing: {img_dir} or {label_dir}")
        continue

    images = [img for img in os.listdir(img_dir) if img.endswith(('.jpg', '.png', '.jpeg'))]
    for img in images:
        base_name = os.path.splitext(img)[0]
        mat_file = os.path.join(label_dir, f"{base_name}.mat")
        alt_mat_file = os.path.join(label_dir, f"annotation_{base_name.split('_')[-1]}.mat")

        if os.path.exists(mat_file) or os.path.exists(alt_mat_file):
            all_images.append((cls, img, mat_file if os.path.exists(mat_file) else alt_mat_file))
        else:
            skipped_images.append(f"{cls}/{img}")

if skipped_images:
    print(f"‚ö†Ô∏è Skipped {len(skipped_images)} images without annotation.")

print(f"‚úÖ Found {len(all_images)} annotated images.\n")

# === STEP 3: Split into train, val, test ===
random.shuffle(all_images)
total = len(all_images)
train_end = int(SPLIT_RATIOS[0] * total)
val_end = train_end + int(SPLIT_RATIOS[1] * total)

splits = {
    'train': all_images[:train_end],
    'val': all_images[train_end:val_end],
    'test': all_images[val_end:]
}

# === STEP 4: Fix Caltech-101 Bounding Box Conversion ===
def convert_to_yolo(mat_file, img_shape):
    try:
        mat = scipy.io.loadmat(mat_file)
        if 'box_coord' not in mat:
            print(f"‚ö†Ô∏è No 'box_coord' in {mat_file}")
            return "0.5 0.5 0.5 0.5"

        box = mat['box_coord'][0]  # [top, bottom, left, right]
        if len(box) != 4:
            print(f"‚ö†Ô∏è Invalid box length in {mat_file}")
            return "0.5 0.5 0.5 0.5"

        # Correct coordinate mapping
        top, bottom, left, right = box
        x_min, y_min = left, top
        x_max, y_max = right, bottom

        h, w = img_shape[:2]
        x_center = ((x_min + x_max) / 2) / w
        y_center = ((y_min + y_max) / 2) / h
        width = (x_max - x_min) / w
        height = (y_max - y_min) / h

        # Clamp values between 0 and 1
        x_center, y_center, width, height = map(lambda v: min(max(v, 0), 1),
                                                [x_center, y_center, width, height])
        return f"{x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"

    except Exception as e:
        print(f"‚ùå Error reading {mat_file}: {e}")
        return "0.5 0.5 0.5 0.5"

# === STEP 5: Convert and Copy ===
for split, images in splits.items():
    for cls, img_name, mat_file in images:
        src_img = os.path.join(SRC_DIR, "images", cls, img_name)
        dst_img = os.path.join(DST_DIR, split, "images", img_name)
        shutil.copy(src_img, dst_img)

        img = cv2.imread(src_img)
        if img is None:
            print(f"‚ö†Ô∏è Cannot read image {src_img}")
            continue

        yolo_box = convert_to_yolo(mat_file, img.shape)
        label_path = os.path.join(DST_DIR, split, "labels", os.path.splitext(img_name)[0] + ".txt")
        with open(label_path, 'w') as f:
            f.write(f"{class_map[cls]} {yolo_box}\n")

print("‚úÖ All annotations converted to YOLO format.\n")

# === STEP 6: Generate data.yaml ===
abs_dst = os.path.abspath(DST_DIR).replace("\\", "/")
yaml_dict = {
    'path': abs_dst,
    'train': f"{abs_dst}/train/images",
    'val': f"{abs_dst}/val/images",
    'test': f"{abs_dst}/test/images",
    'nc': len(CLASSES),
    'names': CLASSES
}
yaml_path = os.path.join(DST_DIR, "data.yaml")
with open(yaml_path, 'w') as f:
    yaml.dump(yaml_dict, f)

print("‚úÖ data.yaml created successfully at:", yaml_path)
print("--------------------------------------")
print(yaml.dump(yaml_dict))
print("--------------------------------------")
print("üéØ YOLO dataset ready for training in:", abs_dst)
