In [3]:
import os
import pandas as pd
import shutil

def organize_images_by_erosion(csv_path, image_folder, output_base_folder):
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Create output folders
    erosion_0_folder = os.path.join(output_base_folder, 'erosion_0')
    erosion_1_folder = os.path.join(output_base_folder, 'erosion_1')
    os.makedirs(erosion_0_folder, exist_ok=True)
    os.makedirs(erosion_1_folder, exist_ok=True)
    
    # Create a dictionary for quick lookup
    erosion_dict = dict(zip(df['ID'], df['c_erosion']))
    
    # Process image files
    for filename in os.listdir(image_folder):
        if filename.lower().endswith('.nii.gz'):
            # Extract the base name (without _x_[3 digits])
            base_name = filename.split('.')[0]
            
            if base_name in erosion_dict:
                source_path = os.path.join(image_folder, filename)
                
                if erosion_dict[base_name] == 0:
                    dest_path = os.path.join(erosion_0_folder, filename)
                else:
                    dest_path = os.path.join(erosion_1_folder, filename)
                
                # Move the file
                shutil.move(source_path, dest_path)
                print(f"Moved {filename} to {'erosion_0' if erosion_dict[base_name] == 0 else 'erosion_1'}")
            else:
                print(f"No matching entry found for {filename}")
    
    print("File organization complete.")

# Usage
csv_path = r"D:\Kananat\Classification_1.csv"
image_folder = r"D:\Kananat\_dataset\train"
output_base_folder = r"D:\Kananat\_2d_dataset\train"

organize_images_by_erosion(csv_path, image_folder, output_base_folder)

Moved 47-16872 L.nii.gz to erosion_1
Moved 47-16872 R.nii.gz to erosion_1
Moved 47-22136 L.nii.gz to erosion_1
Moved 47-4881 L 2014.nii.gz to erosion_1
Moved 47-4881 L 2018.nii.gz to erosion_1
Moved 47-4881 R 2014.nii.gz to erosion_1
Moved 47-4881 R 2018.nii.gz to erosion_1
Moved 48-26453 L.nii.gz to erosion_0
Moved 48-5955 L.nii.gz to erosion_1
Moved 48-5955 R.nii.gz to erosion_1
Moved 49-18165 L.nii.gz to erosion_1
Moved 49-3614 L.nii.gz to erosion_1
Moved 49-3614 R.nii.gz to erosion_1
Moved 50-30909 R.nii.gz to erosion_0
Moved 51-26987 L.nii.gz to erosion_1
Moved 51-26987 R.nii.gz to erosion_1
Moved 51-28114 R.nii.gz to erosion_0
Moved 51-3282 R.nii.gz to erosion_0
Moved 52-11116 L.nii.gz to erosion_0
Moved 52-11116 R.nii.gz to erosion_0
Moved 52-15073 R.nii.gz to erosion_1
Moved 52-15242 L.nii.gz to erosion_1
Moved 52-18852 L.nii.gz to erosion_1
Moved 52-18852 R.nii.gz to erosion_0
Moved 52-27708 R.nii.gz to erosion_1
Moved 52-37193 R.nii.gz to erosion_1
Moved 53-12918 L.nii.gz to 

In [9]:
import os
import cv2
import albumentations as A
import numpy as np
from tqdm import tqdm

def create_medical_augmentation():
    """
    Create an augmentation pipeline suitable for medical images
    """
    transform = A.Compose([
        # Spatial transforms
        A.ShiftScaleRotate(
            shift_limit=0.0625, 
            scale_limit=0.1, 
            rotate_limit=45, 
            border_mode=cv2.BORDER_CONSTANT, 
            p=0.5
        ),
        
        # Noise and blur
        A.OneOf([
            A.GaussNoise(var_limit=(10.0, 50.0), p=1.0),
            A.GaussianBlur(blur_limit=(3, 7), p=1.0),
        ], p=0.3),
        
        # Contrast and brightness
        A.OneOf([
            A.RandomBrightnessContrast(
                brightness_limit=0.2,
                contrast_limit=0.2,
                p=1.0
            ),
            A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=1.0)
        ], p=0.3),
    ])
    return transform

def process_folder(input_folder, output_folder, num_augmentations=3):
    """
    Apply augmentations to all images in a folder
    """
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Get list of jpg files
    image_files = [f for f in os.listdir(input_folder) 
                  if f.lower().endswith('.jpg') or f.lower().endswith('.jpeg')]
    
    # Create augmentation pipeline
    transform = create_medical_augmentation()
    
    # Process each image
    for image_file in tqdm(image_files, desc="Processing images"):
        # Read image
        image_path = os.path.join(input_folder, image_file)
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_file}")
            continue
            
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Get filename without extension
        filename = os.path.splitext(image_file)[0]
        
        # Create augmented versions
        for i in range(num_augmentations):
            # Apply augmentation
            augmented = transform(image=image)['image']
            
            # Convert back to BGR for saving
            augmented_bgr = cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR)
            
            # Save augmented image
            output_path = os.path.join(
                output_folder, 
                f"{filename}_aug_{i+1}.jpg"  # Changed to .jpg
            )
            cv2.imwrite(output_path, augmented_bgr)

In [11]:
# Example usage
input_folder = r"D:\Kananat\_dataset_2d\train\erosion_1"  # Replace with your input folder path
output_folder = r"D:\Kananat\_dataset_2d\train_augmented\erosion_1"  # Replace with your output folder path

process_folder(
    input_folder=input_folder,
    output_folder=output_folder,
    num_augmentations=5  # Number of augmented versions per image
)

Processing images: 100%|██████████| 1519/1519 [00:22<00:00, 68.79it/s]
