In [1]:
import os
import shutil
from pathlib import Path
import glob

def organize_images_by_dataset_structure(dataset_path, images_path, output_path):
    """
    Organize PNG images based on the folder structure of NIfTI dataset.
    
    Args:
        dataset_path (str): Path to the original dataset with .nii.gz files
        images_path (str): Path to the folder containing PNG images
        output_path (str): Path where the new organized dataset will be created
    """
    
    # Convert to Path objects for easier manipulation
    dataset_path = Path(dataset_path)
    images_path = Path(images_path)
    output_path = Path(output_path)
    
    # Create output directory if it doesn't exist
    output_path.mkdir(parents=True, exist_ok=True)
    
    # Iterate through each subset (Train, Val, Test)
    for subset_dir in dataset_path.iterdir():
        if subset_dir.is_dir():
            subset_name = subset_dir.name
            print(f"Processing {subset_name} subset...")
            
            # Create corresponding directory in output
            output_subset_dir = output_path / subset_name
            output_subset_dir.mkdir(parents=True, exist_ok=True)
            
            # Process each .nii.gz file in the subset
            for nii_file in subset_dir.glob("*.nii.gz"):
                # Extract patient name (remove .nii.gz extension)
                patient_name = nii_file.stem.replace('.nii', '')  # Handle .nii.gz
                print(f"  Looking for images for {patient_name}...")
                
                # Find all PNG images for this patient (recursively)
                pattern = f"{patient_name}_*.png"
                matching_images = []
                
                # Search recursively in the images folder
                for image_file in images_path.rglob(pattern):
                    matching_images.append(image_file)
                
                # Copy matching images to the output directory
                copied_count = 0
                for image_file in matching_images:
                    destination = output_subset_dir / image_file.name
                    try:
                        shutil.copy2(image_file, destination)
                        copied_count += 1
                        print(f"    Copied: {image_file.name}")
                    except Exception as e:
                        print(f"    Error copying {image_file.name}: {e}")
                
                print(f"    Found and copied {copied_count} images for {patient_name}")
    
    print(f"\nDataset organization complete! New dataset created at: {output_path}")

def verify_organization(output_path):
    """
    Verify the organization by showing the structure of the new dataset.
    """
    output_path = Path(output_path)
    print(f"\nNew dataset structure:")
    print(f"{output_path.name}/")
    
    for subset_dir in sorted(output_path.iterdir()):
        if subset_dir.is_dir():
            image_count = len(list(subset_dir.glob("*.png")))
            print(f"├── {subset_dir.name}/ ({image_count} images)")
            
            # Show first few images as examples
            images = list(subset_dir.glob("*.png"))
            for i, img in enumerate(sorted(images)[:3]):  # Show first 3
                connector = "│   ├──" if i < 2 and i < len(images) - 1 else "│   └──"
                print(f"{connector} {img.name}")
            
            if len(images) > 3:
                print(f"│   └── ... and {len(images) - 3} more images")

In [2]:
dataset_path = r"d:\Kananat\Data\training_dataset_3D\training_dataset_genSclerosis"  # Path to your original dataset with .nii.gz files
images_path = r"D:\Kananat\Data\training_dataset_2D\training_dataset_osteophyte"    # Path to your folder with PNG images
output_path = r"D:\Kananat\Data\training_dataset_2D\training_dataset_genSclerosis"  # Path where you want the new organized dataset

# Organize the images
organize_images_by_dataset_structure(dataset_path, images_path, output_path)

# Verify the organization
verify_organization(output_path)

Processing test subset...
Processing train subset...
Processing val subset...

Dataset organization complete! New dataset created at: D:\Kananat\Data\training_dataset_2D\training_dataset_genSclerosis

New dataset structure:
training_dataset_genSclerosis/
├── test/ (0 images)
├── train/ (0 images)
├── val/ (0 images)
