In [None]:
import os
import cv2
import numpy as np
from retinaface import RetinaFace
import gc

# Initialize RetinaFace
detector = RetinaFace

def assess_face_quality(face_img, min_size=30, min_confidence=0.95):
    """Assess face quality based on size and detection confidence"""
    if face_img is None or face_img.size == 0:
        return False
    
    height, width = face_img.shape[:2]
    if height < min_size or width < min_size:
        return False
    
    # Add basic blur detection
    gray = cv2.cvtColor(face_img, cv2.COLOR_RGB2GRAY)
    blur_score = cv2.Laplacian(gray, cv2.CV_64F).var()
    if blur_score < 100:  # Adjust threshold as needed
        return False
    
    return True

def detect_faces_enhanced(image):
    """Enhanced face detection using multiple detection passes"""
    # First pass with RetinaFace
    detections = RetinaFace.detect_faces(image)
    face_boxes = []
    face_confidences = []
    
    if isinstance(detections, dict):
        for _, face_data in detections.items():
            facial_area = face_data['facial_area']
            confidence = face_data.get('score', 0.0)
            landmarks = face_data.get('landmarks', None)
            
            # Calculate face angle using landmarks if available
            if landmarks:
                left_eye = landmarks['left_eye']
                right_eye = landmarks['right_eye']
                angle = np.degrees(np.arctan2(
                    right_eye[1] - left_eye[1],
                    right_eye[0] - left_eye[0]
                ))
                
                # Skip faces with extreme angles
                if abs(angle) > 30:
                    continue
            
            x1, y1, x2, y2 = facial_area
            face_boxes.append([x1, y1, x2 - x1, y2 - y1])
            face_confidences.append(confidence)
    
    # Sort faces by confidence
    if face_boxes:
        sorted_faces = sorted(zip(face_boxes, face_confidences), 
                            key=lambda x: x[1], reverse=True)
        return [box for box, _ in sorted_faces]
    
    return []

def preprocess_face_enhanced(face_img, target_size=(224, 224)):
    try:
        if not assess_face_quality(face_img):
            return None
            
        # Enhance contrast using CLAHE
        lab = cv2.cvtColor(face_img, cv2.COLOR_RGB2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        l = clahe.apply(l)
        lab = cv2.merge((l,a,b))
        face_img = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
        
        # Add padding to maintain aspect ratio
        height, width = face_img.shape[:2]
        aspect_ratio = width / height
        
        if aspect_ratio > 1:
            new_width = target_size[0]
            new_height = int(new_width / aspect_ratio)
            padding_top = (target_size[1] - new_height) // 2
            padding_bottom = target_size[1] - new_height - padding_top
            padding_left = 0
            padding_right = 0
        else:
            new_height = target_size[1]
            new_width = int(new_height * aspect_ratio)
            padding_left = (target_size[0] - new_width) // 2
            padding_right = target_size[0] - new_width - padding_left
            padding_top = 0
            padding_bottom = 0
            
        face_img = cv2.resize(face_img, (new_width, new_height))
        face_img = cv2.copyMakeBorder(
            face_img, 
            padding_top, padding_bottom, padding_left, padding_right,
            cv2.BORDER_CONSTANT, 
            value=[0, 0, 0]
        )
        
        return face_img
        
    except Exception as e:
        print(f"Error preprocessing face: {str(e)}")
        return None

In [10]:
def extract_and_save_faces_enhanced(images, labels, output_folder, batch_size=50):
    os.makedirs(output_folder, exist_ok=True)
    
    # Track statistics
    total_images = 0
    successful_detections = 0
    failed_detections = 0
    
    for batch_start in range(0, len(images), batch_size):
        batch_images = images[batch_start:batch_start + batch_size]
        batch_labels = labels[batch_start:batch_start + batch_size]
        
        for (filename, image), image_labels in zip(batch_images, batch_labels):
            total_images += 1
            print(f"Processing {filename}...")
            
            if image_labels == ["nothing"]:
                continue
                
            # Detect faces with enhanced detection
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            face_boxes = detect_faces_enhanced(rgb_image)
            print(face_boxes)
            
            if not face_boxes:
                failed_detections += 1
                print(f"No faces detected in {filename}")
                continue
                
            successful_detections += 1
            
            # Process detected faces
            for i, (box, label) in enumerate(zip(face_boxes, image_labels)):
                x, y, w, h = box
                
                # Dynamic margin based on face size
                margin = int(max(w, h) * 0.3)  # Increased margin
                x = max(0, x - margin)
                y = max(0, y - margin)
                w = min(w + 2 * margin, image.shape[1] - x)
                h = min(h + 2 * margin, image.shape[0] - y)
                
                face = image[y:y+h, x:x+w]
                processed_face = preprocess_face_enhanced(face)
                
                if processed_face is not None:
                    label_folder = os.path.join(output_folder, label.lower())
                    os.makedirs(label_folder, exist_ok=True)
                    face_path = os.path.join(label_folder, 
                                           f"{os.path.splitext(filename)[0]}_face_{i}.jpg")
                    cv2.imwrite(face_path, cv2.cvtColor(processed_face, cv2.COLOR_RGB2BGR))
    
    # Print statistics
    print(f"\nProcessing complete:")
    print(f"Total images processed: {total_images}")
    print(f"Successful detections: {successful_detections}")
    print(f"Failed detections: {failed_detections}")
    print(f"Success rate: {(successful_detections/total_images)*100:.2f}%")

In [11]:
def load_images(image_folder, label_map=None):
    images = []
    image_labels = []

    for filename in os.listdir(image_folder):
        img_path = os.path.join(image_folder, filename)
        img = cv2.imread(img_path)

        if img is not None:
            images.append((filename, img))
            if label_map:
                image_labels.append(label_map.get(filename, []))  # Default to empty list for test images

    return images, image_labels if label_map else None


In [12]:
# Paths
train_image_folder = "../data/olda_data/cleaned_images"
label_csv_path = "../data/labels/clean_data.csv"
output_folder = "../data/faces_retinaface_enhanced/train_faces"

# Load label data
import pandas as pd
label_data = pd.read_csv(label_csv_path)
label_data['label_name'] = label_data['label_name'].apply(eval)  # Convert string to list
label_map = dict(zip(label_data['image'].astype(str).str.zfill(4) + ".jpg", label_data['label_name']))

# Load training images and labels
train_images, train_labels = load_images(train_image_folder, label_map=label_map)

# Crop and save faces with enhanced processing
print("Processing training images...")
extract_and_save_faces_enhanced(train_images, labels=train_labels, output_folder=output_folder, batch_size=50)
print("Cropped faces have been saved.")

Processing training images...
Processing 0032.jpg...
[]
No faces detected in 0032.jpg
Processing 0195.jpg...
[]
No faces detected in 0195.jpg
Processing 0569.jpg...
[]
No faces detected in 0569.jpg
Processing 0601.jpg...
[]
No faces detected in 0601.jpg
Processing 0072.jpg...
[]
No faces detected in 0072.jpg
Processing 0703.jpg...
[]
No faces detected in 0703.jpg
Processing 0516.jpg...
[]
No faces detected in 0516.jpg
Processing 0387.jpg...
[]
No faces detected in 0387.jpg
Processing 0710.jpg...
[]
No faces detected in 0710.jpg
Processing 0427.jpg...
[]
No faces detected in 0427.jpg
Processing 0356.jpg...
[]
No faces detected in 0356.jpg
Processing 0463.jpg...
[]
No faces detected in 0463.jpg
Processing 0325.jpg...
[]
No faces detected in 0325.jpg
Processing 0235.jpg...
[]
No faces detected in 0235.jpg
Processing 0151.jpg...
[]
No faces detected in 0151.jpg
Processing 0281.jpg...
[]
No faces detected in 0281.jpg
Processing 0158.jpg...
[]
No faces detected in 0158.jpg
Processing 0590.jp

KeyboardInterrupt: 

In [None]:
def extract_and_save_test_faces_enhanced(images, output_folder, batch_size=50):
    os.makedirs(output_folder, exist_ok=True)
    
    # Track statistics
    total_images = 0
    successful_detections = 0
    failed_detections = 0
    
    for batch_start in range(0, len(images), batch_size):
        batch_images = images[batch_start:batch_start + batch_size]
        
        for filename, image in batch_images:
            total_images += 1
            print(f"Processing {filename}...")
            
            # Detect faces with enhanced detection
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            face_boxes = detect_faces_enhanced(rgb_image)
            
            if not face_boxes:
                failed_detections += 1
                print(f"No faces detected in {filename}")
                continue
                
            successful_detections += 1
            
            # Process and save each face
            for i, box in enumerate(face_boxes):
                x, y, w, h = box
                
                # Dynamic margin based on face size
                margin = int(max(w, h) * 0.3)
                x = max(0, x - margin)
                y = max(0, y - margin)
                w = min(w + 2 * margin, image.shape[1] - x)
                h = min(h + 2 * margin, image.shape[0] - y)
                
                face = image[y:y+h, x:x+w]
                processed_face = preprocess_face_enhanced(face)
                
                if processed_face is not None:
                    face_filename = f"{os.path.splitext(filename)[0]}_face_{i}.jpg"
                    face_path = os.path.join(output_folder, face_filename)
                    cv2.imwrite(face_path, cv2.cvtColor(processed_face, cv2.COLOR_RGB2BGR))
    
    # Print statistics
    print(f"\nProcessing complete:")
    print(f"Total images processed: {total_images}")
    print(f"Successful detections: {successful_detections}")
    print(f"Failed detections: {failed_detections}")
    print(f"Success rate: {(successful_detections/total_images)*100:.2f}%")

In [None]:
# Paths
test_image_folder = "../data/images/test_images/cleaned_images"
test_output_folder = "../data/faces_retinaface_enhanced/test_faces"

# Load test images
test_images, _ = load_images(test_image_folder)

# Crop and save test faces with enhanced processing
print("Processing test images...")
extract_and_save_test_faces_enhanced(test_images, output_folder=test_output_folder, batch_size=50)
print("Cropped faces from test set have been saved.")