In [1]:
import os
import shutil
import random

# Set seed for reproducibility
random.seed(2025)

# Define source and destination directories
source_root = "dataset/our-data/v1.3"  # Original dataset path
train_root = f"{source_root}/train"
test_root = f"{source_root}/test"

# Create train and test directories
for split in ["train", "test"]:
    for subfolder in ["depth", "rgb"]:
        os.makedirs(os.path.join(f"{source_root}/{split}", subfolder), exist_ok=True)

# Get all image pairs
image_pairs = []
depth_root = os.path.join(source_root, "depth")
rgb_root = os.path.join(source_root, "rgb")

for scene in os.listdir(depth_root):
    depth_scene_path = os.path.join(depth_root, scene)
    rgb_scene_path = os.path.join(rgb_root, scene)

    if os.path.isdir(depth_scene_path) and os.path.isdir(rgb_scene_path):
        for img_name in os.listdir(depth_scene_path):
            depth_img_path = os.path.join(depth_scene_path, img_name)
            rgb_img_path = os.path.join(rgb_scene_path, img_name.replace("depth", "rgb"))

            if os.path.exists(rgb_img_path):  # Ensure corresponding RGB image exists
                image_pairs.append((rgb_img_path, depth_img_path, scene))

# Shuffle and split
random.shuffle(image_pairs)
split_ratio = 0.8  # 80% train, 20% test
split_idx = int(len(image_pairs) * split_ratio)

train_pairs = image_pairs[:split_idx]
test_pairs = image_pairs[split_idx:]

# Function to move files
def move_files(pairs, destination):
    for rgb_path, depth_path, scene in pairs:
        # Create subfolders in train/test based on scene name
        os.makedirs(os.path.join(destination, "rgb", scene), exist_ok=True)
        os.makedirs(os.path.join(destination, "depth", scene), exist_ok=True)

        # Move files
        shutil.copy(rgb_path, os.path.join(destination, "rgb", scene, os.path.basename(rgb_path)))
        shutil.copy(depth_path, os.path.join(destination, "depth", scene, os.path.basename(depth_path)))

# Move train and test data
# move_files(train_pairs, train_root)
# move_files(test_pairs, test_root)

def copy_files(pairs, destination):
    for rgb_path, depth_path, scene in pairs:
        # Create subfolders in train/test based on scene name
        os.makedirs(os.path.join(destination, "rgb", scene), exist_ok=True)
        os.makedirs(os.path.join(destination, "depth", scene), exist_ok=True)

        # Copy files instead of moving
        shutil.copy(rgb_path, os.path.join(destination, "rgb", scene, os.path.basename(rgb_path)))
        shutil.copy(depth_path, os.path.join(destination, "depth", scene, os.path.basename(depth_path)))

# Copy train and test data
copy_files(train_pairs, train_root)
copy_files(test_pairs, test_root)

print(f"Train set: {len(train_pairs)} pairs, Test set: {len(test_pairs)} pairs")
print("Dataset split complete!")


Train set: 960 pairs, Test set: 240 pairs
Dataset split complete!
