In [8]:
import os
import shutil
from sklearn.model_selection import train_test_split


In [None]:

# Define paths
synthetic_images_folder = "/home/sala/data/synthetic_images"
real_images_folder = "/home/sala/data/soil_images"
output_folder = "/home/sala/data/organized_data"

# Create output directories
os.makedirs(os.path.join(output_folder, "train"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "val"), exist_ok=True)
os.makedirs(os.path.join(output_folder, "test"), exist_ok=True)

# Split synthetic images
synthetic_images = [f for f in os.listdir(synthetic_images_folder) if f.endswith(('.jpg', '.png'))]
train_synthetic, temp_synthetic = train_test_split(synthetic_images, test_size=0.2, random_state=42)
val_synthetic, test_synthetic = train_test_split(temp_synthetic, test_size=0.5, random_state=42)

# Move synthetic images to respective folders
def move_images(image_list, source_folder, target_folder):
    for img in image_list:
        src_path = os.path.join(source_folder, img)
        dst_path = os.path.join(target_folder, img)
        shutil.move(src_path, dst_path)

move_images(train_synthetic, synthetic_images_folder, os.path.join(output_folder, "train"))
move_images(val_synthetic, synthetic_images_folder, os.path.join(output_folder, "val"))
move_images(test_synthetic, synthetic_images_folder, os.path.join(output_folder, "test"))

# Reserve real-world images for testing
real_images = [f for f in os.listdir(real_images_folder) if f.endswith(('.jpg', '.png'))]
move_images(real_images, real_images_folder, os.path.join(output_folder, "test"))

In [1]:
import cv2

def resize_image(image_path, output_size=(256, 256)):
    """
    Resize an image to a standard size.
    
    Parameters:
        image_path (str): Path to the input image.
        output_size (tuple): Target size (width, height).
    
    Returns:
        np.ndarray: Resized image.
    """
    image = cv2.imread(image_path)
    resized_image = cv2.resize(image, output_size)
    return resized_image

In [2]:
def convert_to_lab(image):
    """
    Convert an image from BGR to LAB color space.
    
    Parameters:
        image (np.ndarray): Input image in BGR format.
    
    Returns:
        np.ndarray: Image in LAB color space.
    """
    lab_image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    return lab_image

In [3]:
def normalize_lighting(image):
    """
    Normalize lighting using CLAHE (Contrast-Limited Adaptive Histogram Equalization).
    
    Parameters:
        image (np.ndarray): Input image in LAB color space.
    
    Returns:
        np.ndarray: Image with normalized lighting.
    """
    # Split LAB channels
    l, a, b = cv2.split(image)
    
    # Apply CLAHE to the L channel
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    l_normalized = clahe.apply(l)
    
    # Merge channels back
    normalized_lab = cv2.merge((l_normalized, a, b))
    return normalized_lab

In [4]:
def denoise_image(image):
    """
    Denoise an image using Gaussian blur.
    
    Parameters:
        image (np.ndarray): Input image in BGR or LAB format.
    
    Returns:
        np.ndarray: Denoised image.
    """
    denoised_image = cv2.GaussianBlur(image, (5, 5), 0)
    return denoised_image

In [6]:
def preprocess_image(image_path, output_size=(256, 256)):
    """
    Preprocess an image by resizing, converting to LAB, normalizing lighting, and denoising.
    
    Parameters:
        image_path (str): Path to the input image.
        output_size (tuple): Target size (width, height).
    
    Returns:
        np.ndarray: Preprocessed image in LAB color space.
    """
    # Step 1: Resize image
    resized_image = resize_image(image_path, output_size)
    
    # Step 2: Convert to LAB color space
    lab_image = convert_to_lab(resized_image)
    
    # Step 3: Normalize lighting
    normalized_lab = normalize_lighting(lab_image)
    
    # Step 4: Denoise (optional)
    preprocessed_image = denoise_image(normalized_lab)
    
    return preprocessed_image

In [11]:
def preprocess_images_in_folder(input_folder, output_folder, output_size=(256, 256)):
    """
    Preprocess all images in a folder and save them to an output folder.
    
    Parameters:
        input_folder (str): Path to the folder containing input images.
        output_folder (str): Path to the folder to save preprocessed images.
        output_size (tuple): Target size (width, height).
    """
    os.makedirs(output_folder, exist_ok=True)
    image_files = [f for f in os.listdir(input_folder) if f.endswith(('.jpg', '.png'))]
    
    for image_file in image_files:
        input_path = os.path.join(input_folder, image_file)
        output_path = os.path.join(output_folder, image_file)
        
        # Preprocess the image
        preprocessed_image = preprocess_image(input_path, output_size)
        
        # Save the preprocessed image
        cv2.imwrite(output_path, preprocessed_image)

# Example usage
input_folder = "/home/sala/data/organized_data/val"
output_folder = "/home/sala/data/organized_data/preprocessed_val"
preprocess_images_in_folder(input_folder, output_folder)