In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Build zero-based class map excluding 03 and 07
source_base = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/raw/data"
available_folders = sorted([f for f in os.listdir(source_base) if os.path.isdir(os.path.join(source_base, f)) and f not in ['03', '07']])
class_map = {folder: idx for idx, folder in enumerate(available_folders)}

print("✅ Class mapping:")
for f, i in class_map.items():
    print(f"Class folder {f} → Class ID {i}")

✅ Class mapping:
Class folder 01 → Class ID 0
Class folder 02 → Class ID 1
Class folder 04 → Class ID 2
Class folder 05 → Class ID 3
Class folder 06 → Class ID 4
Class folder 08 → Class ID 5
Class folder 09 → Class ID 6
Class folder 10 → Class ID 7
Class folder 11 → Class ID 8
Class folder 12 → Class ID 9
Class folder 13 → Class ID 10
Class folder 14 → Class ID 11
Class folder 15 → Class ID 12


In [None]:
import os
import json
import shutil
from pathlib import Path
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

# Define base paths
base_dir = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data"
train_dir = os.path.join(base_dir, "full_data/train")
output_dir = os.path.join(base_dir, "yolo_data")  # New YOLO dataset output

# Load gt.json
with open(os.path.join(train_dir, "gt.json")) as f:
    gt_data = json.load(f)

# Inspect first 2 items
print("🔍 Sample keys from gt.json:", list(gt_data.keys())[:2])
print("📄 Example annotation:", gt_data[list(gt_data.keys())[0]])

🔍 Sample keys from gt.json: ['01_0000', '01_0001']
📄 Example annotation: [[0.0963063, 0.99404401, 0.0510079, 0.57332098, -0.0135081, -0.81922001, -0.81365103, 0.10814, -0.57120699], [-105.3577515, -117.52119142, 1014.8770132], [244, 150, 44, 58]]


In [None]:
# Create YOLO folder structure
for folder in ["images/train", "images/val", "labels/train", "labels/val"]:
    os.makedirs(os.path.join(output_dir, folder), exist_ok=True)

images_dir = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/full_data/train/images"
# Get all .png filenames
image_names = sorted([f for f in os.listdir(images_dir) if f.endswith(".png")])

# Match only images that have corresponding bbox in gt.json (without .png)
available = [img for img in image_names if img.replace(".png", "") in gt_data]
print(f"✅ Found {len(available)} annotated images out of {len(image_names)}")

# Split into 80% train, 20% val
train_imgs, val_imgs = train_test_split(available, test_size=0.2, random_state=8)
print(f"📊 Train: {len(train_imgs)} images — Val: {len(val_imgs)} images")

✅ Found 14220 annotated images out of 14220
📊 Train: 11376 images — Val: 2844 images


In [None]:
import os
import shutil
from tqdm import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split
import cv2

# === Paths ===
base_dir = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/yolo_data"
os.makedirs(os.path.join(base_dir, "images/train"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "images/val"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "labels/train"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "labels/val"), exist_ok=True)

# === Train/Val split ===
train_imgs, val_imgs = train_test_split(available, test_size=0.2, random_state=8)
print(f"📊 Train: {len(train_imgs)} images — Val: {len(val_imgs)} images")

def convert_bbox(bbox, img_w, img_h):
    x, y, w, h = bbox
    x_center = (x + w / 2) / img_w
    y_center = (y + h / 2) / img_h
    return x_center, y_center, w / img_w, h / img_h

# === Generate YOLO labels and copy files ===
for split, image_list in zip(["train", "val"], [train_imgs, val_imgs]):
    print(f"📁 Processing {split} set: {len(image_list)} images")
    for name in tqdm(image_list, desc=f"🔄 {split}"):
        img_path = os.path.join(images_dir, name)
        dst_img_path = os.path.join(base_dir, f"images/{split}/{name}")
        label_path = os.path.join(base_dir, f"labels/{split}/{name.replace('.png', '.txt')}")

        key = name.replace(".png", "")
        if key not in gt_data:
            print(f"⚠️ Missing key: {key}")
            continue

        bbox = gt_data[key][2]
        folder = key.split("_")[0]
        if folder not in class_map:
            print(f"⚠️ Skipping {key} — unknown class {folder}")
            continue
        class_id = class_map[folder]


        # Validate image
        img = cv2.imread(img_path)
        if img is None:
            print(f"⚠️ Unreadable image: {img_path}")
            continue
        h, w = img.shape[:2]

        # Convert and write label
        x_center, y_center, norm_w, norm_h = convert_bbox(bbox, w, h)
        with open(label_path, "w") as f:
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {norm_w:.6f} {norm_h:.6f}\n")

        # Copy image
        shutil.copyfile(img_path, dst_img_path)

📊 Train: 11376 images — Val: 2844 images
📁 Processing train set: 11376 images


🔄 train: 100%|██████████| 11376/11376 [05:44<00:00, 33.02it/s]


📁 Processing val set: 2844 images


🔄 val: 100%|██████████| 2844/2844 [01:24<00:00, 33.59it/s]


In [None]:
import yaml

data_yaml = {
    "train": "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/yolo_data/images/train",
    "val": "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/yolo_data/images/val",
    "nc": 13,
    "names": [
        "ape", "benchvise", "camera", "can", "cat", "driller", "duck",
        "eggbox", "glue", "holepuncher", "iron", "lamp", "phone"
    ]
}

yaml_path = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/yolo_data/data.yaml"

with open(yaml_path, "w") as f:
    yaml.dump(data_yaml, f)

print(f"✅ data.yaml created at {yaml_path}")

✅ data.yaml created at /content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/yolo_data/data.yaml


In [7]:
import os

image_dir = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/yolo_data/images"
label_dir = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/yolo_data/labels"

def validate_split(split):
    print(f"\n🔍 Checking {split.upper()} split:")
    img_path = os.path.join(image_dir, split)
    lbl_path = os.path.join(label_dir, split)

    image_files = sorted([f[:-4] for f in os.listdir(img_path) if f.endswith(".png")])
    label_files = sorted([f[:-4] for f in os.listdir(lbl_path) if f.endswith(".txt")])

    missing_labels = sorted(set(image_files) - set(label_files))
    missing_images = sorted(set(label_files) - set(image_files))

    print(f"✅ Total images: {len(image_files)}")
    print(f"✅ Total labels: {len(label_files)}")

    if missing_labels:
        print(f"⚠️ {len(missing_labels)} image(s) missing labels:")
        print(missing_labels[:5], "..." if len(missing_labels) > 5 else "")
    else:
        print("✅ All images have labels.")

    if missing_images:
        print(f"⚠️ {len(missing_images)} label(s) missing images:")
        print(missing_images[:5], "..." if len(missing_images) > 5 else "")
    else:
        print("✅ All labels have images.")

# Run for both splits
validate_split("train")
validate_split("val")


🔍 Checking TRAIN split:
✅ Total images: 11376
✅ Total labels: 11376
✅ All images have labels.
✅ All labels have images.

🔍 Checking VAL split:
✅ Total images: 2844
✅ Total labels: 2844
✅ All images have labels.
✅ All labels have images.
