In [10]:
import cv2
import os

In [11]:
def detect_and_crop_faces(image_path, cascade):
    """ Detect faces in an image and return a list of cropped faces """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    faces = cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=4)
    cropped_faces = [img[y:y+h, x:x+w] for x, y, w, h in faces]
    return cropped_faces

In [12]:
def process_images(input_folder, output_folders):
    """ Process all images in the input folder and save cropped faces to output folders """
    # Load Haar Cascade
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # List all files in the input directory
    image_files = [f for f in os.listdir(input_folder) if os.path.isfile(os.path.join(input_folder, f))]
    
    # Counters for folder distribution
    folder_counters = [0] * len(output_folders)

    # Process each file
    for file_name in image_files:
        image_path = os.path.join(input_folder, file_name)
        cropped_faces = detect_and_crop_faces(image_path, face_cascade)

        # Determine output folder based on current counts
        folder_idx = folder_counters.index(min(folder_counters))
        output_folder = output_folders[folder_idx]
        folder_counters[folder_idx] += len(cropped_faces)

        # Create output folder if it doesn't exist
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        # Save each face to the output folder
        for face_id, face in enumerate(cropped_faces):
            # Create a unique filename for each face
            face_file = os.path.join(output_folder, f'{os.path.basename(image_path)}')
            cv2.imwrite(face_file, face)

In [13]:
input_folder = r"D:\Challenge\grayscale_train"
output_folders = [f"D:\Challenge\subset{i}" for i in range(10)]

process_images(input_folder, output_folders)

KeyboardInterrupt: 