In [1]:
import cv2
import os
import numpy as np
from glob import glob

## Preprocessing

In [2]:
def preprocess_image(image_path, output_size=(256, 256)):
    # Load the image in grayscale mode
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print("Error loading image:", image_path)
        return None

    # Binarize the image using Otsu's thresholding
    # This converts the image to a binary image (0 and 255)
    _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    
    # Optionally, you can perform additional morphological operations here
    # For example, if you want to remove noise or fill small gaps:
    # kernel = np.ones((3, 3), np.uint8)
    # binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # Normalize the image size by resizing
    binary_resized = cv2.resize(binary, output_size, interpolation=cv2.INTER_AREA)
    
    return binary_resized

In [3]:
def process_dataset(input_folder, output_folder, output_size=(256, 256)):
    # Create the output directory if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Get list of image files in the input folder (assuming PNG images)
    image_files = glob(os.path.join(input_folder, "**", "*.png"), recursive=True)
    
    print(f"Found {len(image_files)} images in {input_folder}.")
    
    # Process each image
    for image_path in image_files:
        preprocessed = preprocess_image(image_path, output_size)
        if preprocessed is not None:
            filename = os.path.basename(image_path)
            output_path = os.path.join(output_folder, filename)
            cv2.imwrite(output_path, preprocessed)
    
    print("Preprocessing complete. Preprocessed images saved to:", output_folder)

In [4]:

if __name__ == "__main__":
    # Set the paths to your dataset directory and the directory to save processed images.
    input_folder = "./sketches"       
    output_folder = "./processed_sketches"  
    
    output_size = (256, 256)
    
    process_dataset(input_folder, output_folder, output_size)


Found 20000 images in ./sketches.
Preprocessing complete. Preprocessed images saved to: ./processed_sketches
