In [1]:
import os
import sys
import shutil
from pathlib import Path

def merge_test_folder(dataset_base_path: str, test_folder_name: str):
    """
    Merges a temporary test folder into the main dataset, renaming files sequentially
    for 1-indexed file systems.
    """
    base_path = Path(dataset_base_path)
    if not base_path.is_dir():
        print(f"Error: Dataset base path '{base_path}' does not exist.")
        return

    print(f"Starting merge process for test folder: '{test_folder_name}'")
    print("-" * 50)

    for set_type in ['train', 'valid']:
        print(f"\nProcessing '{set_type}' set...")

        dest_img_dir = base_path / set_type / 'images'
        dest_lbl_dir = base_path / set_type / 'labels'
        src_img_dir = dest_img_dir / test_folder_name
        src_lbl_dir = dest_lbl_dir / test_folder_name

        if not src_img_dir.is_dir():
            print(f"Source image folder not found, skipping: {src_img_dir}")
            continue
        if not src_lbl_dir.is_dir():
            print(f"Source label folder not found, skipping: {src_lbl_dir}")
            continue

        start_index = len(list(dest_img_dir.glob('*.jpg')))
        print(f"'{dest_img_dir.name}' contains {start_index} images. New files will start from index {start_index + 1}.")

        source_images = sorted(list(src_img_dir.glob('*.jpg')))
        
        if not source_images:
            print("No images found in the source directory.")

        for i, src_img_path in enumerate(source_images):
            src_lbl_path = src_lbl_dir / f"{src_img_path.stem}.txt"
            
            if not src_lbl_path.exists():
                print(f"  Warning: Label file not found for {src_img_path.name}. Skipping this image.")
                continue

            # ## FIX: Add 1 to correctly handle 1-indexed file naming
            new_index = start_index + i + 1
            new_name_base = f"{new_index:06d}"
            
            dest_img_path = dest_img_dir / f"{new_name_base}.jpg"
            dest_lbl_path = dest_lbl_dir / f"{new_name_base}.txt"
            
            print(f"  Moving {src_img_path.name} -> {dest_img_path.name}")
            shutil.move(src_img_path, dest_img_path)
            shutil.move(src_lbl_path, dest_lbl_path)

        try:
            if not any(src_img_dir.iterdir()):
                print(f"Cleaning up empty source folder: {src_img_dir}")
                src_img_dir.rmdir()
            if not any(src_lbl_dir.iterdir()):
                print(f"Cleaning up empty source folder: {src_lbl_dir}")
                src_lbl_dir.rmdir()
        except OSError as e:
            print(f"Error removing source directory: {e}. It might not be empty.")

    print("\n" + "-" * 50)
    print("Merge process completed successfully.")




In [None]:

dataset_path = "/fs/nexus-scratch/hwahed/dlcDatasetMaker/dataset"
folder_name = "arabeql1"

# --- SAFETY CHECK ---
# It's always a good idea to have a confirmation before modifying the file system.
print(f"\nThis script will move and rename files from:")
print(f"  - {Path(dataset_path) / 'train/images' / folder_name}")
print(f"  - {Path(dataset_path) / 'train/labels' / folder_name}")
print(f"  - {Path(dataset_path) / 'valid/images' / folder_name}")
print(f"  - {Path(dataset_path) / 'valid/labels' / folder_name}")
print(f"\nInto the parent directories and will remove the source folders if empty.")

# confirmation = input("Are you sure you want to proceed? (y/n): ")
# if confirmation.lower() != 'y':
#     print("Operation cancelled.")
#     sys.exit(0)

merge_test_folder(dataset_path, folder_name)


This script will move and rename files from:
  - /fs/nexus-scratch/hwahed/dlcDatasetMaker/dataset/train/images/abeq2
  - /fs/nexus-scratch/hwahed/dlcDatasetMaker/dataset/train/labels/abeq2
  - /fs/nexus-scratch/hwahed/dlcDatasetMaker/dataset/valid/images/abeq2
  - /fs/nexus-scratch/hwahed/dlcDatasetMaker/dataset/valid/labels/abeq2

Into the parent directories and will remove the source folders if empty.
Starting merge process for test folder: 'abeq2'
--------------------------------------------------

Processing 'train' set...
'images' contains 264 images. New files will start from index 265.
  Moving 000001.jpg -> 000265.jpg
  Moving 000002.jpg -> 000266.jpg
  Moving 000003.jpg -> 000267.jpg
  Moving 000004.jpg -> 000268.jpg
  Moving 000005.jpg -> 000269.jpg
  Moving 000006.jpg -> 000270.jpg
  Moving 000007.jpg -> 000271.jpg
  Moving 000008.jpg -> 000272.jpg
  Moving 000009.jpg -> 000273.jpg
  Moving 000010.jpg -> 000274.jpg
  Moving 000011.jpg -> 000275.jpg
  Moving 000012.jpg -> 00