In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm

INPUT_BASE_PATH = "Hair Diseases"
OUTPUT_BASE_PATH = "hair_preprocessed"
IMAGE_SIZE = (224, 224)

def preprocess_image(image):
    """
    Applies grayscale conversion, CLAHE, bilateral filtering, 
    normalization, resizing, and converts back to 3 channels.
    """
    # converting to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # CLAHE 
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_img = clahe.apply(gray)

    # bilateral filtering
    filtered = cv2.bilateralFilter(clahe_img, d=9, sigmaColor=75, sigmaSpace=75)

    # Normalize to [0, 255]
    normalized = cv2.normalize(filtered, None, 0, 255, cv2.NORM_MINMAX)

    # Converting back to 3 channels for CNN input compatibility
    final_img = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR)

    # Resizing to target size
    resized = cv2.resize(final_img, IMAGE_SIZE)

    return resized

def process_directory(input_dir, output_dir):
    """
    Recursively processes all images in a directory and 
    saves them in the same structure under the output directory.
    """
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".jfif", ".webp")):
                input_path = os.path.join(root, file)
                
                relative_path = os.path.relpath(root, input_dir)
                save_dir = os.path.join(output_dir, relative_path)
                os.makedirs(save_dir, exist_ok=True)

                try:
                    img = cv2.imread(input_path)
                    if img is not None:
                        processed_img = preprocess_image(img)
                        base_name = os.path.splitext(file)[0]
                        output_path = os.path.join(save_dir, base_name + ".png")
                        cv2.imwrite(output_path, processed_img)
                except Exception as e:
                    print(f"Error processing {input_path}: {e}")

for split in ['train', 'test', 'val']:
    input_path = os.path.join(INPUT_BASE_PATH, split)
    output_path = os.path.join(OUTPUT_BASE_PATH, split)

    print(f"\nProcessing {split} data...")
    process_directory(input_path, output_path)

print("\n Hair image preprocessing complete!")


Processing train data...

Processing test data...

Processing val data...

 Hair image preprocessing complete!
