In [1]:
from ultralytics import YOLO
import cv2
import os
from multiprocessing import Pool

In [2]:
# Load YOLOv8 face detection model
model = YOLO('yolov8n.pt')  # Replace with a face-specific model if available

2025-01-19 20:56:37.445255: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-19 20:56:40.079057: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/envs/ml_expressions/lib:/usr/local/cuda/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64:/usr/local/cuda-11.8/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-12.6/lib64
2025-01-19 20:56:40.079260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dyna

TensorFlow Version: 2.11.0
GPU Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2025-01-19 20:56:42.369185: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-01-19 20:56:42.600999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-01-19 20:56:42.604369: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [12]:
# Preprocess the image
def preprocess_image(image):
    """
    Prepares the image for YOLO face detection.
    Parameters:
    - image: Input image as a NumPy array.
    Returns:
    - The original image (YOLO handles resizing internally).
    """
    return image  # YOLO handles preprocessing automatically

In [13]:
# Function to check if an image contains a human face
def has_human_face(image_path, confidence_threshold=0.5):
    """
    Detects if an image contains a human face using YOLOv8.

    Parameters:
    - image_path: Path to the image file.
    - confidence_threshold: Minimum confidence score for valid face detection.

    Returns:
    - True if a human face is detected with sufficient confidence, False otherwise.
    """
    try:
        results = model.predict(image_path, conf=confidence_threshold, save=False)
        return len(results[0].boxes) > 0  # Check if any bounding boxes were detected
    except Exception as e:
        print(f"Error detecting face in {image_path}: {e}")
        return False

Cleaned dataset. Removed 0 unsupported files.


In [14]:
# Worker function for multiprocessing
def worker(args):
    file_path, input_folder, output_folder_faces, output_folder_non_faces, confidence_threshold = args
    try:
        image = cv2.imread(file_path)
        if image is None:
            return False  # Skip unreadable images

        # Preprocess image
        processed_image = preprocess_image(image)

        # Check for human face
        if has_human_face(file_path, confidence_threshold=confidence_threshold):
            relative_path = os.path.relpath(file_path, input_folder)
            output_path = os.path.join(output_folder_faces, relative_path)
        else:
            relative_path = os.path.relpath(file_path, input_folder)
            output_path = os.path.join(output_folder_non_faces, relative_path)

        # Save the image
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        cv2.imwrite(output_path, processed_image)
        return True
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return False

In [15]:

# Main function with batching
def test_human_face_detection(input_folder, output_folder_faces, output_folder_non_faces, batch_size=500, confidence_threshold=0.5):
    """
    Processes all images in the input folder for human face detection.

    Parameters:
    - input_folder: Root folder of the input dataset.
    - output_folder_faces: Folder for images detected as faces.
    - output_folder_non_faces: Folder for images detected as non-faces.
    - batch_size: Number of images to process before printing progress.
    - confidence_threshold: Minimum confidence score for valid face detection.
    """
    os.makedirs(output_folder_faces, exist_ok=True)
    os.makedirs(output_folder_non_faces, exist_ok=True)

    image_paths = [
        os.path.join(root, file)
        for root, _, files in os.walk(input_folder)
        for file in files
    ]

    total_images = len(image_paths)
    kept_images = 0

    # Process images in batches
    for i in range(0, total_images, batch_size):
        batch_paths = image_paths[i:i + batch_size]
        args = [
            (file_path, input_folder, output_folder_faces, output_folder_non_faces, confidence_threshold)
            for file_path in batch_paths
        ]

        # Use multiprocessing for batch
        with Pool(processes=4) as pool:
            results = pool.map(worker, args)
            kept_images += sum(results)

        print(f"Processed {min(i + batch_size, total_images)}/{total_images} images...")

    print(f"Total images processed: {total_images}")
    print(f"Images kept (faces): {kept_images}")
    print(f"Images sorted as non-faces: {total_images - kept_images}")

In [None]:
# Example usage
input_folder = "/home/natalyagrokh/img_datasets/test_images"
output_folder_faces = "/home/natalyagrokh/img_datasets/combined_datasets_faces"
output_folder_non_faces = "/home/natalyagrokh/img_datasets/combined_datasets_non_faces"
test_human_face_detection(input_folder, output_folder_faces, output_folder_non_faces, batch_size=500, confidence_threshold=0.5)