In [2]:
import os
import cv2
import numpy as np
from tqdm import tqdm

INPUT_BASE_PATH = "."
OUTPUT_BASE_PATH = "teeth_preprocessed"
# Target size for resizing
IMAGE_SIZE = (224, 224)

def preprocess_image(image):
    """
    Preprocesses an input image by resizing, applying CLAHE,
    bilateral filtering, and normalization.
    """
    # grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # CLAHE 
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_img = clahe.apply(gray)

    # bilateral filter
    filtered = cv2.bilateralFilter(clahe_img, d=9, sigmaColor=75, sigmaSpace=75)

    # Normalize to range [0, 255]
    normalized = cv2.normalize(filtered, None, 0, 255, cv2.NORM_MINMAX)

    # Converting back to 3 channels for CNN compatibility
    final_img = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR)

    # Resizing to target size
    resized = cv2.resize(final_img, IMAGE_SIZE)

    return resized

def process_directory(input_dir, output_dir):
    """
    Recursively processes all images inside a directory and saves
    the preprocessed images while maintaining directory structure.
    """
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".jfif", ".webp")):
                input_path = os.path.join(root, file)
                
                # Recreate directory structure in output path
                relative_path = os.path.relpath(root, input_dir)
                save_dir = os.path.join(output_dir, relative_path)
                os.makedirs(save_dir, exist_ok=True)

                try:
                    img = cv2.imread(input_path)
                    if img is not None:
                        processed_img = preprocess_image(img)

                        base_name = os.path.splitext(file)[0]
                        output_path = os.path.join(save_dir, base_name + ".png")

                        cv2.imwrite(output_path, processed_img)
                except Exception as e:
                    print(f"Error processing {input_path}: {e}")

SKIP_FOLDERS = [
    "Caries_Gingivitus_ToothDiscoloration_Ulcer-yolo_annotated-Dataset",
    "cavity dataset/labels",
    "cavity dataset/data.yaml",
    "data.yaml",
]

print("Starting preprocessing...\n")

for folder in os.listdir(INPUT_BASE_PATH):
    if folder in SKIP_FOLDERS:
        print(f"Skipping: {folder}")
        continue

    folder_path = os.path.join(INPUT_BASE_PATH, folder)

    if not os.path.isdir(folder_path):
        continue

    print(f"Processing folder: {folder}")
    process_directory(folder_path, os.path.join(OUTPUT_BASE_PATH, folder))

print("\n Preprocessing complete!")


Starting preprocessing...

Processing folder: Calculus
Skipping: Caries_Gingivitus_ToothDiscoloration_Ulcer-yolo_annotated-Dataset
Processing folder: Cavity Dataset
Processing folder: Data caries
Processing folder: Gingivitis
Processing folder: hypodontia
Processing folder: Mouth Ulcer
Processing folder: Tooth Discoloration

 Preprocessing complete!
