In [1]:
import cv2
import os

In [3]:
# Paths to the pre-trained model files
prototxt_path = "deploy.prototxt"
caffemodel_path = "res10_300x300_ssd_iter_140000.caffemodel"

In [4]:
# Load the pre-trained face detection model
net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

In [9]:
# Directory containing the images
input_dir = "lfw_dataset_curated"
output_dir = "lfw_dataset_curated_2"

In [10]:
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [11]:
# Minimum confidence threshold to consider a detection as a face
confidence_threshold = 0.5

In [12]:
# Iterate over all images in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff')):
        image_path = os.path.join(input_dir, filename)
        image = cv2.imread(image_path)
        
        # Get the dimensions of the image
        (h, w) = image.shape[:2]
        
        # Prepare the image for face detection
        blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
        
        # Pass the blob through the network and get the detections
        net.setInput(blob)
        detections = net.forward()
        
        # Check if any faces were detected
        face_detected = False
        for i in range(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            
            # Filter out weak detections
            if confidence > confidence_threshold:
                face_detected = True
                break
        
        # If a face was detected, save the image to the output directory
        if face_detected:
            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, image)
            print(f"Face detected in {filename}, saved to {output_path}")
        else:
            print(f"No face detected in {filename}, discarding...")

print("Face detection and filtering complete.")

Face detected in Junichiro_Koizumi_0014.jpg_face1.jpg, saved to lfw_dataset_curated_2/Junichiro_Koizumi_0014.jpg_face1.jpg
Face detected in George_W_Bush_0317.jpg_face1.jpg, saved to lfw_dataset_curated_2/George_W_Bush_0317.jpg_face1.jpg
Face detected in George_W_Bush_0247.jpg_face1.jpg, saved to lfw_dataset_curated_2/George_W_Bush_0247.jpg_face1.jpg
Face detected in Serena_Williams_0019.jpg_face1.jpg, saved to lfw_dataset_curated_2/Serena_Williams_0019.jpg_face1.jpg
Face detected in Igor_Ivanov_0001.jpg_face1.jpg, saved to lfw_dataset_curated_2/Igor_Ivanov_0001.jpg_face1.jpg
Face detected in John_Howard_0018.jpg_face1.jpg, saved to lfw_dataset_curated_2/John_Howard_0018.jpg_face1.jpg
Face detected in Pete_Sampras_0002.jpg_face1.jpg, saved to lfw_dataset_curated_2/Pete_Sampras_0002.jpg_face1.jpg
Face detected in Alan_Greenspan_0003.jpg_face1.jpg, saved to lfw_dataset_curated_2/Alan_Greenspan_0003.jpg_face1.jpg
Face detected in George_W_Bush_0483.jpg_face1.jpg, saved to lfw_dataset_cura