In [1]:
import os
import cv2
from facenet_pytorch import MTCNN
import numpy as np
from PIL import Image

In [3]:
#function filters high res imgs and crops out faces, saves faces only
#kills kernel b/c not enough RAM, process using GPU
def crop_and_save_faces_batch(input_folder, output_folder, batch_size=10):
    """
    Detects faces in images, crops them, and saves them as individual files in batches.
    Includes error handling, progress saving, and logging.
    
    Parameters:
    - input_folder: Path to the folder containing input images.
    - output_folder: Path to save cropped face images.
    - batch_size: Number of images to process in one batch.
    """
    # Initialize MTCNN
    mtcnn = MTCNN(keep_all=True, device="cpu")  # Use GPU if available

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Gather all image paths
    image_paths = []
    for root, _, files in os.walk(input_folder):
        for file in files:
            image_paths.append(os.path.join(root, file))

    total_images = len(image_paths)
    processed_images = 0
    errors = 0

    print(f"Total images found: {total_images}")

    # Process images in batches
    for i in range(0, len(image_paths), batch_size):
        batch_paths = image_paths[i:i + batch_size]

        for file_path in batch_paths:
            try:
                # Open image
                image = Image.open(file_path).convert("RGB")
                # Detect faces
                boxes, _ = mtcnn.detect(image)

                if boxes is not None:  # If faces are detected
                    for idx, box in enumerate(boxes):
                        # Crop and save each face
                        left, top, right, bottom = map(int, box)
                        face = image.crop((left, top, right, bottom))
                        output_path = os.path.join(output_folder, f"{os.path.basename(file_path)}_face{idx+1}.jpg")
                        face.save(output_path)
                processed_images += 1

            except Exception as e:
                errors += 1
                with open("error_log.txt", "a") as log_file:
                    log_file.write(f"Error processing {file_path}: {e}\n")
                print(f"Error processing file {file_path}: {e}")
                continue
        
        # Log progress
        print(f"Processed {processed_images}/{total_images} images. Errors so far: {errors}")

    print(f"Processing complete. Total processed: {processed_images}. Total errors: {errors}.")

In [None]:
# mini dataset
input_folder =  "/Users/natalyagrokh/AI/ml_expressions/img_datasets/mini_imgset"
output_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/mini_curated_dataset"
filtered_count = crop_and_save_faces_batch(input_folder, output_folder)

In [2]:
#function filters by face, discarding all others
def filter_face_images(input_folder, output_folder, min_face_size=(50, 50)):
    os.makedirs(output_folder, exist_ok=True)
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    total_images = 0
    filtered_images = 0

    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff")):
                file_path = os.path.join(root, file)
                try:
                    # Read image
                    img = cv2.imread(file_path)
                    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                    # Detect faces
                    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=min_face_size)

                    # If at least one face is detected, save the image to the output folder
                    if len(faces) > 0:
                        filtered_images += 1
                        output_path = os.path.join(output_folder, file)
                        Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).save(output_path)

                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")
                
                total_images += 1

    print(f"Total images processed: {total_images}")
    print(f"Filtered images with faces: {filtered_images}")
    return filtered_images

In [3]:
# function detects faces in (48,48) imgs
def filter_low_res(input_folder, output_folder):
    """
    Filters images with faces for low-resolution images (e.g., 48x48).
    Uses MTCNN for robust detection.

    Parameters:
    - input_folder: Path to the folder containing input images.
    - output_folder: Path to save filtered images.
    """
    # Initialize MTCNN
    mtcnn = MTCNN(keep_all=True, device="cpu")  # Use GPU if available
    
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    total_images = 0
    filtered_images = 0

    for root, _, files in os.walk(input_folder):
        for file in files:
            total_images += 1
            file_path = os.path.join(root, file)
            try:
                # Open the image
                image = Image.open(file_path).convert("RGB")
                # Temporarily resize to improve detection
                resized_image = image.resize((256, 256))
                # Detect faces
                boxes, _ = mtcnn.detect(resized_image)
                if boxes is not None:  # At least one face detected
                    filtered_images += 1
                    # Save the original image in output folder
                    output_path = os.path.join(output_folder, file)
                    image.save(output_path)
            except Exception as e:
                print(f"Error processing file {file}: {e}")
    
    print(f"Total images processed: {total_images}")
    print(f"Filtered images with faces: {filtered_images}")

In [3]:
# ck_dataset
input_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/ck_dataset"
output_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_ck_dataset"
filtered_count = filter_face_images(input_folder, output_folder)

Total images processed: 981
Filtered images with faces: 0


In [4]:
# fer_2013_dataset
input_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/fer_2013_dataset"
output_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_fer_2013_dataset"
filtered_count = filter_face_images(input_folder, output_folder)

Total images processed: 35887
Filtered images with faces: 0


In [5]:
# jaffe_dataset
input_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/jaffe_dataset"
output_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_jaffe_dataset"
filtered_count = filter_face_images(input_folder, output_folder)

Total images processed: 213
Filtered images with faces: 213


In [6]:
# wider_face_dataset
input_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/wider_face_dataset"
output_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_wider_face_dataset"
filtered_count = filter_face_images(input_folder, output_folder)

Total images processed: 32203
Filtered images with faces: 23085


In [12]:
# ck_dataset
filter_low_res("/Users/natalyagrokh/AI/ml_expressions/img_datasets/ck_dataset",
               "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_ck_dataset")

Error processing file .DS_Store: cannot identify image file '/Users/natalyagrokh/AI/ml_expressions/img_datasets/ck_dataset/.DS_Store'
Error processing file 1.complete: cannot identify image file '/Users/natalyagrokh/AI/ml_expressions/img_datasets/ck_dataset/1.complete'
Error processing file .DS_Store: cannot identify image file '/Users/natalyagrokh/AI/ml_expressions/img_datasets/ck_dataset/versions/.DS_Store'
Error processing file .DS_Store: cannot identify image file '/Users/natalyagrokh/AI/ml_expressions/img_datasets/ck_dataset/versions/1/.DS_Store'
Total images processed: 985
Filtered images with faces: 981


In [None]:
# # ck_dataset - kills kernel b/c not enough RAM, processing on GPU best
# filter_low_res("/Users/natalyagrokh/AI/ml_expressions/img_datasets/fer_2013_dataset",
#                 "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_fer_2013_dataset")

In [None]:
# wider_face_dataset
input_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/wider_face_dataset"
output_folder = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/filtered_wider_face_dataset"
crop_and_save_faces_batch(input_folder, output_folder, batch_size=10)