In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
from retinaface import RetinaFace
import cv2
import os
from multiprocessing import Pool
import tensorflow as tf

2025-01-26 00:26:03.244481: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
# Optional: Limit TensorFlow's GPU memory usage
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.set_logical_device_configuration(
            gpus[0],
            [tf.config.LogicalDeviceConfiguration(memory_limit=4096)]  # Limit to 4GB
        )
    except RuntimeError as e:
        print(e)

In [4]:
# Preprocess the image (e.g., resize, histogram equalization)
def preprocess_image(image, target_size=(640, 640)):
    resized_image = cv2.resize(image, target_size)
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    equalized = cv2.equalizeHist(gray)
    return resized_image, equalized

In [5]:
# Function to check if an image contains a human face
def has_human_face(image_path, confidence_threshold=0.9):
    try:
        faces = RetinaFace.detect_faces(image_path)
        for _, details in faces.items():
            if details['score'] >= confidence_threshold:
                return True
        return False
    except Exception as e:
        print(f"Error detecting face in {image_path}: {e}")
        return False

In [6]:
# Worker function moved outside for multiprocessing
def worker(args):
    file_path, input_folder, output_folder_faces, output_folder_non_faces, confidence_threshold = args
    try:
        image = cv2.imread(file_path)
        if image is None:
            return False  # Skip non-image files

        resized_image, _ = preprocess_image(image)

        # Check for human face
        if has_human_face(file_path, confidence_threshold=confidence_threshold):
            relative_path = os.path.relpath(file_path, input_folder)
            output_path = os.path.join(output_folder_faces, relative_path)
        else:
            relative_path = os.path.relpath(file_path, input_folder)
            output_path = os.path.join(output_folder_non_faces, relative_path)

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        cv2.imwrite(output_path, resized_image)
        return True
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return False

In [7]:
# Main function
def test_human_face_detection(input_folder, output_folder_faces, output_folder_non_faces, batch_size=100, confidence_threshold=0.9):
    os.makedirs(output_folder_faces, exist_ok=True)
    os.makedirs(output_folder_non_faces, exist_ok=True)
    image_paths = [
        os.path.join(root, file)
        for root, _, files in os.walk(input_folder)
        for file in files
    ]

    total_images = len(image_paths)
    kept_images = 0

    with Pool(processes=4) as pool:
        args = [
            (file_path, input_folder, output_folder_faces, output_folder_non_faces, confidence_threshold)
            for file_path in image_paths
        ]
        results = pool.imap(worker, args)
        for i, result in enumerate(results, start=1):
            if result:
                kept_images += 1

            # Print progress in batches
            if i % batch_size == 0:
                print(f"Processed {i}/{total_images} images...")

    print(f"Total images processed: {total_images}")
    print(f"Images kept (faces): {kept_images}")
    print(f"Images sorted as non-faces: {total_images - kept_images}")

In [None]:
# Example usage
input_folder = "/home/natalyagrokh/img_datasets/combined_datasets_small"
output_folder_faces = "/home/natalyagrokh/img_datasets/combined_datasets_small_lightcur"
output_folder_non_faces = "/home/natalyagrokh/img_datasets/combined_datasets_small_rejects"
test_human_face_detection(input_folder, output_folder_faces, output_folder_non_faces, batch_size=500, confidence_threshold=0.9)

2025-01-26 00:26:10.635358: E tensorflow/stream_executor/cuda/cuda_driver.cc:1276] could not retrieve CUDA device count: CUDA_ERROR_NOT_INITIALIZED: initialization error
2025-01-26 00:26:10.641274: E tensorflow/stream_executor/cuda/cuda_driver.cc:1276] could not retrieve CUDA device count: CUDA_ERROR_NOT_INITIALIZED: initialization error
2025-01-26 00:26:10.676937: E tensorflow/stream_executor/cuda/cuda_driver.cc:1276] could not retrieve CUDA device count: CUDA_ERROR_NOT_INITIALIZED: initialization error
2025-01-26 00:26:10.702745: E tensorflow/stream_executor/cuda/cuda_driver.cc:1276] could not retrieve CUDA device count: CUDA_ERROR_NOT_INITIALIZED: initialization error


Processed 500/37079 images...
