In [62]:

import os
import cv2
import numpy as np
from rembg import remove

In [63]:
original_dataset_path = r"D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset"
augmented_dataset_path = r"D:\MED_LEAF_ID-1\data\augmented"
preprocessed_original_path = r"D:\MED_LEAF_ID-1\data\cnn\original"
preprocessed_augmented_path = r"D:\MED_LEAF_ID-1\data\cnn\augmented"

In [64]:
def create_directory_structure(dataset_path, preprocessed_path):
    """Creates necessary directories for preprocessed images."""
    os.makedirs(preprocessed_path, exist_ok=True)
    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)
        if os.path.isdir(class_path):
            os.makedirs(os.path.join(preprocessed_path, class_folder), exist_ok=True)

In [65]:
def preprocess_image_for_cnn(image_path, target_size=(224, 224)):
    """
    Advanced image preprocessing for CNN input with multiple enhancement techniques.
    
    Args:
        image_path (str): Path to the input image
        target_size (tuple): Desired output image size (default 224x224)
    
    Returns:
        numpy.ndarray: Preprocessed image ready for CNN input
    """
    # Read image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return None
    
    # Convert to RGB for background removal
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Remove background
    output_image = remove(rgb_image)
    output_np = cv2.cvtColor(output_image, cv2.COLOR_RGB2BGR)
    
    # Contrast Enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    lab = cv2.cvtColor(output_np, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    
    # Apply CLAHE to L-channel
    l_clahe = clahe.apply(l)
    
    # Merge the CLAHE enhanced L-channel with the a and b channel
    enhanced_lab = cv2.merge((l_clahe, a, b))
    enhanced_image = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
    
    # Sharpening using unsharp masking
    gaussian_3 = cv2.GaussianBlur(enhanced_image, (0, 0), 2.0)
    sharpened_image = cv2.addWeighted(enhanced_image, 1.5, gaussian_3, -0.5, 0)
    
    # Resize with interpolation
    resized_image = cv2.resize(sharpened_image, target_size, interpolation=cv2.INTER_LANCZOS4)
    
    # Create a binary mask for background removal
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Morphological operations to refine mask
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    
    # Set background to white
    resized_image[mask == 0] = (255, 255, 255)
    
    # Noise Reduction
    denoised_image = cv2.fastNlMeansDenoisingColored(resized_image, None, 10, 10, 7, 21)
    
    # Final normalization
    normalized_image = denoised_image.astype('float32') / 255.0
    
    return normalized_image


In [66]:
def preprocess_and_save_images(dataset_path, preprocessed_path):
    """Processes and saves images with minimal transformations."""
    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)
        if os.path.isdir(class_path):
            save_path = os.path.join(preprocessed_path, class_folder)
            for image_file in os.listdir(class_path):
                image_path = os.path.join(class_path, image_file)
                processed_image = preprocess_image_for_cnn(image_path)
                if processed_image is not None:
                    # Convert back to uint8 (0-255) before saving
                    cv2.imwrite(os.path.join(save_path, image_file), (processed_image * 255).astype(np.uint8))
                else:
                    print(f"Skipping image: {image_file}")

In [None]:
if __name__ == "__main__":
    create_directory_structure(original_dataset_path, preprocessed_original_path)
    create_directory_structure(augmented_dataset_path, preprocessed_augmented_path)

    preprocess_and_save_images(original_dataset_path, preprocessed_original_path)
    preprocess_and_save_images(augmented_dataset_path, preprocessed_augmented_path)
    
    print("Preprocessing complete with minimal tuning!")