In [1]:
import os
import cv2
from facenet_pytorch import MTCNN
from PIL import Image
import psutil
import time

In [2]:
#function filters high res imgs and crops out faces, saves faces only
#works incrementally
#kills kernel b/c not enough RAM, process using GPU
def crop_and_save_faces(input_folder, output_folder):
    """
    Detects faces in images, crops them, and saves them as individual files incrementally.
    Processes images one at a time to reduce memory usage.
    
    Parameters:
    - input_folder: Path to the folder containing input images.
    - output_folder: Path to save cropped face images.
    """
    # Initialize MTCNN
    mtcnn = MTCNN(keep_all=True, device="cpu")  # Use GPU if available

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Gather all image paths
    image_paths = []
    for root, _, files in os.walk(input_folder):
        for file in files:
            image_paths.append(os.path.join(root, file))

    total_images = len(image_paths)
    processed_images = 0
    errors = 0

    print(f"Total images found: {total_images}")

    for file_path in image_paths:
        try:
            # Open image
            image = Image.open(file_path).convert("RGB")
            # Detect faces
            boxes, _ = mtcnn.detect(image)

            if boxes is not None:  # If faces are detected
                for idx, box in enumerate(boxes):
                    # Crop and save each face
                    left, top, right, bottom = map(int, box)
                    face = image.crop((left, top, right, bottom))
                    output_path = os.path.join(output_folder, f"{os.path.basename(file_path)}_face{idx+1}.jpg")
                    face.save(output_path)
            processed_images += 1

            # Monitor memory usage
            memory_info = psutil.virtual_memory()
            if memory_info.percent > 90:  # If memory usage exceeds 90%, pause
                print("High memory usage detected. Pausing for 30 seconds.")
                time.sleep(30)

        except Exception as e:
            errors += 1
            with open("error_log.txt", "a") as log_file:
                log_file.write(f"Error processing {file_path}: {e}\n")
            print(f"Error processing file {file_path}: {e}")
            continue

        # Log progress every 100 images
        if processed_images % 100 == 0:
            print(f"Processed {processed_images}/{total_images} images. Errors so far: {errors}")

    print(f"Processing complete. Total processed: {processed_images}. Total errors: {errors}.")

In [None]:
# wider_face_dataset
input_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/wider_face_dataset"
output_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_wider_face_dataset"
crop_and_save_faces(input_folder, output_folder)