In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from skimage import exposure
from skimage.color import rgb2lab, lab2rgb

# preprocessing

In [9]:
# Function for basic image preprocessing
def preprocess_images(input_folder, output_folder, target_size=(224, 224)):
    classes = os.listdir(input_folder)

    for class_name in classes:
        class_path = os.path.join(input_folder, class_name)
        output_class_path = os.path.join(output_folder, class_name)
        os.makedirs(output_class_path, exist_ok=True)

        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            img = cv2.imread(img_path)

            # Resize image
            img = cv2.resize(img, target_size)

            # Normalize to range [-1, 1]
            img = img / 127.5 - 1.0

            # Save preprocessed image
            output_path = os.path.join(output_class_path, filename)
            cv2.imwrite(output_path, (img + 1.0) * 127.5)

In [12]:

# Define input and output folders
input_dataset_folder = "C:/Users/SANYA CHANDEL/Desktop/original_weeds_dataset"
preprocessed_folder = "C:/Users/SANYA CHANDEL/Desktop/new_preprocessed"

In [15]:
# Basic preprocessing
preprocess_images(input_dataset_folder, preprocessed_folder)

# clahe

In [16]:
import cv2
import os
from pathlib import Path
from tqdm import tqdm

In [17]:
# Function to apply CLAHE to an image (handles both colored and grayscale)
def apply_clahe(image):
    if len(image.shape) == 3:  # Colored image (3 channels)
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        l_channel = lab[:, :, 0]
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
        clahe_img = clahe.apply(l_channel)
        lab[:, :, 0] = clahe_img
        result_image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
    else:  # Grayscale image (1 channel)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
        result_image = clahe.apply(image)
    
    return result_image

In [19]:
# Path to the folder containing your original images
input_folder = "C:/Users/SANYA CHANDEL/Desktop/new_preprocessed"

# Path to the folder where you want to save the processed images
output_folder = "C:/Users/SANYA CHANDEL/Desktop/new_clahe"

In [20]:
# Loop through each class folder in the input folder
for class_folder in os.listdir(input_folder):
    class_path = os.path.join(input_folder, class_folder)
    
    # Create a corresponding class folder in the output folder
    output_class_path = os.path.join(output_folder, class_folder)
    Path(output_class_path).mkdir(parents=True, exist_ok=True)
    
    # Loop through each image in the class folder
    for image_file in tqdm(os.listdir(class_path)):
        image_path = os.path.join(class_path, image_file)
        
        # Read the image
        img = cv2.imread(image_path)
        
        # Check if the image is not None before processing
        if img is not None:
            # Handle colored and grayscale images
            if len(img.shape) == 3:  # Colored image (3 channels)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            elif len(img.shape) == 2:  # Grayscale image (1 channel)
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            
            # Apply CLAHE
            processed_img = apply_clahe(img)
            
            # Save the processed image to the output folder
            output_path = os.path.join(output_class_path, image_file)
            cv2.imwrite(output_path, cv2.cvtColor(processed_img, cv2.COLOR_RGB2BGR))
        else:
            print(f"Failed to read image: {image_path}")

100%|██████████████████████████████████████████████████████████████████████████████████| 51/51 [00:00<00:00, 68.17it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 81.08it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 48/48 [00:00<00:00, 71.30it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 75.07it/s]


# train-test split & augmentation

In [22]:
# Function for splitting dataset and applying augmentation
def split_and_augment(input_folder, output_train_folder, output_test_folder, augmentation_params):
    classes = os.listdir(input_folder)

    for class_name in classes:
        class_path = os.path.join(input_folder, class_name)
        images = [os.path.join(class_path, filename) for filename in os.listdir(class_path)]

        # Split into train and test sets
        train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)

        # Create output folders
        train_class_path = os.path.join(output_train_folder, class_name)
        test_class_path = os.path.join(output_test_folder, class_name)
        os.makedirs(train_class_path, exist_ok=True)
        os.makedirs(test_class_path, exist_ok=True)

        # Augmentation for training set only
        datagen = ImageDataGenerator(**augmentation_params)

        for img_path in train_images:
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = img / 255.0  # Normalize to [0, 1]

            # Expand dimensions for Keras ImageDataGenerator
            img = np.expand_dims(img, axis=0)

            # Generate augmented images
            augmented_images = datagen.flow(img, batch_size=1)

            for i in range(5):  # Number of augmented images per input image
                augmented_image = augmented_images.next()[0]
                augmented_image = (augmented_image * 255.0).astype(np.uint8)

                # Save augmented image
                output_path = os.path.join(train_class_path, f"aug_{i}_{os.path.basename(img_path)}")
                cv2.imwrite(output_path, augmented_image)

        # Copy test set images to the output folder
        for img_path in test_images:
            output_path = os.path.join(test_class_path, os.path.basename(img_path))
            img = cv2.imread(img_path)
            cv2.imwrite(output_path, img)

In [23]:
clahe_folder = "C:/Users/SANYA CHANDEL/Desktop/new_clahe"
train_aug_folder = "C:/Users/SANYA CHANDEL/Desktop/new_train"
test_aug_folder = "C:/Users/SANYA CHANDEL/Desktop/new_test"

In [24]:
# Split and augment
augmentation_params = {
    'rotation_range': 20,
    'width_shift_range': 0.2,
    'height_shift_range': 0.2,
    'shear_range': 0.2,
    'zoom_range': 0.2,
    'horizontal_flip': True,
    'fill_mode': 'nearest'
}

split_and_augment(clahe_folder, train_aug_folder, test_aug_folder, augmentation_params)