In [4]:
def preprocess_face(face_img, target_size=(224, 224)):
    """Preprocess face image for model training."""
    try:
        if face_img is None or face_img.size == 0:
            return None
            
        # Convert to RGB if needed
        if len(face_img.shape) == 2:
            face_img = cv2.cvtColor(face_img, cv2.COLOR_GRAY2RGB)
        elif face_img.shape[2] == 4:
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGRA2RGB)
        elif face_img.shape[2] == 3:
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            
        # Resize while maintaining aspect ratio
        aspect_ratio = face_img.shape[1] / face_img.shape[0]
        if aspect_ratio > 1:
            # Width is larger
            new_width = target_size[0]
            new_height = int(new_width / aspect_ratio)
        else:
            # Height is larger
            new_height = target_size[1]
            new_width = int(new_height * aspect_ratio)
            
        resized = cv2.resize(face_img, (new_width, new_height), interpolation=cv2.INTER_AREA)
        
        # Create blank canvas
        final_img = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
        
        # Center the image
        y_offset = (target_size[0] - new_height) // 2
        x_offset = (target_size[1] - new_width) // 2
        
        final_img[y_offset:y_offset+new_height, x_offset:x_offset+new_width] = resized
        
        return final_img
        
    except Exception as e:
        print(f"Error preprocessing face: {str(e)}")
        return None

# Update the face processing part in extract_and_save_faces
def extract_and_save_faces(images, labels, output_folder, batch_size=50, target_size=(224, 224)):
    """Extract faces from images and save them preprocessed into corresponding label folders."""
    os.makedirs(output_folder, exist_ok=True)
    
    for batch_start in range(0, len(images), batch_size):
        
        batch_images = images[batch_start:batch_start + batch_size]
        batch_labels = labels[batch_start:batch_start + batch_size]
        
        for (filename, image), image_labels in zip(batch_images, batch_labels):
            print(f"Processing {filename}...")
            
            if image_labels == ["nothing"]:
                continue
                
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            face_boxes = detect_faces(rgb_image)
            
            if len(face_boxes) != len(image_labels):
                detections = detector.detect_faces(rgb_image)
                face_boxes = sorted(
                    [d['box'] for d in detections if d['confidence'] > 0.85],
                    key=lambda x: x[0]
                )
                
                if len(face_boxes) != len(image_labels):
                    print(f"Skipping {filename} - Could not match face count")
                    continue
            
            for i, (box, label) in enumerate(zip(face_boxes, image_labels)):
                x, y, width, height = box
                
                # Add margin
                margin = int(max(width, height) * 0.2)
                x = max(0, x - margin)
                y = max(0, y - margin)
                width = min(width + 2*margin, image.shape[1] - x)
                height = min(height + 2*margin, image.shape[0] - y)
                
                face = image[y:y+height, x:x+width]
                
                if face.size == 0 or face.shape[0] < 50 or face.shape[1] < 50:
                    print(f"Warning: Face too small in {filename}")
                    continue
                
                # Preprocess face
                processed_face = preprocess_face(face, target_size)
                if processed_face is None:
                    print(f"Warning: Could not process face in {filename}")
                    continue
                
                # Save processed face
                label_folder = os.path.join(output_folder, label.lower())
                os.makedirs(label_folder, exist_ok=True)
                
                face_filename = f"{os.path.splitext(filename)[0]}_face_{i}.jpg"
                cv2.imwrite(os.path.join(label_folder, face_filename), cv2.cvtColor(processed_face, cv2.COLOR_RGB2BGR))
        
        gc.collect()



In [None]:
import os
import cv2
import numpy as np
from mtcnn import MTCNN
import gc

# Initialize MTCNN
detector = MTCNN()

def load_images(image_folder, label_map=None):
    """
    Load images from a folder, optionally mapping filenames to labels.
    Args:
        image_folder: Path to the folder containing images.
        label_map: Optional dictionary mapping filenames to labels.
    Returns:
        images: List of (filename, image) tuples.
        labels: List of labels corresponding to the images (or None if no labels).
    """
    images = []
    image_labels = []

    for filename in os.listdir(image_folder):
        img_path = os.path.join(image_folder, filename)
        img = cv2.imread(img_path)

        if img is not None:
            images.append((filename, img))
            if label_map:
                image_labels.append(label_map.get(filename, []))  # Default to empty list for test images

    return images, image_labels if label_map else None

# def detect_faces(image):
#     """
#     Detect faces in an image using MTCNN.
#     Args:
#         image: Input image (NumPy array).
#     Returns:
#         List of bounding boxes [(x, y, width, height)] for detected faces.
#     """
#     detections = detector.detect_faces(image)
#     return [detection['box'] for detection in detections if detection['confidence'] > 0.9]

# def crop_and_save_faces(images, labels, output_folder, batch_size=50, resize_dim=(224, 224)):
#     """
#     Crop faces from images and save them into respective folders.
#     Args:
#         images: List of (filename, image) tuples.
#         labels: Corresponding labels for the images (or None for test images).
#         output_folder: Folder to save cropped faces.
#         batch_size: Number of images to process per batch.
#         resize_dim: Tuple (width, height) to resize cropped faces.
#     """
#     # Create default folder
#     default_folder = os.path.join(output_folder, "default")
#     os.makedirs(default_folder, exist_ok=True)

#     # Create folders for labeled faces if labels are provided
#     if labels:
#         for label_list in labels:
#             for label in label_list:
#                 label_folder = os.path.join(output_folder, label)
#                 os.makedirs(label_folder, exist_ok=True)

#     # Process images in batches
#     for batch_start in range(0, len(images), batch_size):
#         batch_end = batch_start + batch_size
#         batch_images = images[batch_start:batch_end]
#         batch_labels = labels[batch_start:batch_end] if labels else None

#         for idx, (filename, img) in enumerate(batch_images):
#             # Resize image for faster processing
#             img_resized = cv2.resize(img, (640, 480))

#             # Detect faces
#             faces = detect_faces(img_resized)

#             # Scale coordinates back to original image size
#             scale_x = img.shape[1] / img_resized.shape[1]
#             scale_y = img.shape[0] / img_resized.shape[0]
#             faces = [(int(x * scale_x), int(y * scale_y), int(w * scale_x), int(h * scale_y)) for x, y, w, h in faces]

#             # Sort faces by x-coordinate
#             sorted_faces = sorted(faces, key=lambda box: box[0])

#             # Save each detected face
#             for i, (x, y, w, h) in enumerate(sorted_faces):
#                 face = img[y:y + h, x:x + w]
#                 face_resized = cv2.resize(face, resize_dim)
                
#                 face_resized = cv2.cvtColor(face_resized, cv2.COLOR_BGR2GRAY)

#                 # Save to the default folder
#                 face_path_default = os.path.join(default_folder, f"{filename}_face_{i}.jpg")
#                 cv2.imwrite(face_path_default, face_resized)

#                 # Save to the respective label folders if labels are provided
#                 if labels and len(batch_labels[idx]) > i:
#                     person_label = batch_labels[idx][i]
#                     label_folder = os.path.join(output_folder, person_label)
#                     face_path_label = os.path.join(label_folder, f"{filename}_{person_label}_face_{i}.jpg")
#                     cv2.imwrite(face_path_label, face_resized)

#         # Clear memory after processing the batch
#         gc.collect()



In [5]:
def extract_and_save_test_faces(images, output_folder, target_size=(224, 224), batch_size=50):
    """Extract faces from test images and save them sequentially."""
    os.makedirs(output_folder, exist_ok=True)
    
    for batch_start in range(0, len(images), batch_size):
        batch_images = images[batch_start:batch_start + batch_size]
        
        for filename, image in batch_images:
            print(f"Processing {filename}...")
            
            # Convert BGR to RGB for MTCNN
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Detect faces and sort left-to-right
            detections = detector.detect_faces(rgb_image)
            face_boxes = sorted(
                [d['box'] for d in detections if d['confidence'] > 0.85],
                key=lambda x: x[0]
            )
            
            # Process each face
            for i, box in enumerate(face_boxes):
                x, y, width, height = box
                
                # Add margin
                margin = int(max(width, height) * 0.2)
                x = max(0, x - margin)
                y = max(0, y - margin)
                width = min(width + 2*margin, image.shape[1] - x)
                height = min(height + 2*margin, image.shape[0] - y)
                
                face = image[y:y+height, x:x+width]
                
                if face.size == 0 or face.shape[0] < 50 or face.shape[1] < 50:
                    print(f"Warning: Face too small in {filename}")
                    continue
                
                # Preprocess face
                processed_face = preprocess_face(face, target_size)
                if processed_face is None:
                    print(f"Warning: Could not process face in {filename}")
                    continue
                
                # Save processed face with sequential numbering
                face_filename = f"{os.path.splitext(filename)[0]}_face_{i}.jpg"
                cv2.imwrite(os.path.join(output_folder, face_filename), 
                           cv2.cvtColor(processed_face, cv2.COLOR_RGB2BGR))
            
            print(f"Found {len(face_boxes)} faces in {filename}")
        
        gc.collect()

test_images = []
test_folder = "../data/images/test_images/cleaned_images"
for filename in sorted(os.listdir(test_folder)):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        img_path = os.path.join(test_folder, filename)
        img = cv2.imread(img_path)
        if img is not None:
            test_images.append((filename, img))

output_folder = "../data/faces3_test"
extract_and_save_test_faces(test_images, output_folder)

Processing 0037.jpg...
Found 1 faces in 0037.jpg
Processing 0039.jpg...
Found 3 faces in 0039.jpg
Processing 0040.jpg...
Found 4 faces in 0040.jpg
Processing 0043.jpg...
Found 2 faces in 0043.jpg
Processing 0045.jpg...
Found 1 faces in 0045.jpg
Processing 0051.jpg...
Found 0 faces in 0051.jpg
Processing 0058.jpg...
Found 1 faces in 0058.jpg
Processing 0062.jpg...
Found 1 faces in 0062.jpg
Processing 0066.jpg...
Found 2 faces in 0066.jpg
Processing 0078.jpg...
Found 1 faces in 0078.jpg
Processing 0103.jpg...
Found 1 faces in 0103.jpg
Processing 0125.jpg...
Found 1 faces in 0125.jpg
Processing 0139.jpg...
Found 1 faces in 0139.jpg
Processing 0160.jpg...
Found 0 faces in 0160.jpg
Processing 0161.jpg...
Found 1 faces in 0161.jpg
Processing 0166.jpg...
Found 1 faces in 0166.jpg
Processing 0168.jpg...
Found 1 faces in 0168.jpg
Processing 0174.jpg...
Found 1 faces in 0174.jpg
Processing 0179.jpg...
Found 1 faces in 0179.jpg
Processing 0182.jpg...
Found 4 faces in 0182.jpg
Processing 0194.jpg.

In [6]:
def extract_and_save_test_faces(images, output_folder, target_size=(224, 224), batch_size=50):
    """Extract faces from test images and save them sequentially."""
    os.makedirs(output_folder, exist_ok=True)
    
    for batch_start in range(0, len(images), batch_size):
        batch_images = images[batch_start:batch_start + batch_size]
        
        for filename, image in batch_images:
            print(f"Processing {filename}...")
            
            # Convert BGR to RGB for MTCNN
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Detect faces and sort left-to-right
            detections = detector.detect_faces(rgb_image)
            face_boxes = sorted(
                [d['box'] for d in detections if d['confidence'] > 0.85],
                key=lambda x: x[0]
            )
            
            # If no faces are detected, create a fallback face crop
            if not face_boxes:
                print(f"No faces detected in {filename}. Using fallback.")
                h, w, _ = image.shape
                fallback_margin = int(min(h, w) * 0.1)  # 10% margin
                x = max(0, w // 4 - fallback_margin)
                y = max(0, h // 4 - fallback_margin)
                width = min(w // 2 + fallback_margin * 2, w - x)
                height = min(h // 2 + fallback_margin * 2, h - y)
                face_boxes = [[x, y, width, height]]

            # Process each face
            for i, box in enumerate(face_boxes):
                x, y, width, height = box
                
                # Add margin
                margin = int(max(width, height) * 0.2)
                x = max(0, x - margin)
                y = max(0, y - margin)
                width = min(width + 2 * margin, image.shape[1] - x)
                height = min(height + 2 * margin, image.shape[0] - y)
                
                face = image[y:y+height, x:x+width]
                
                if face.size == 0 or face.shape[0] < 50 or face.shape[1] < 50:
                    print(f"Warning: Face too small in {filename}")
                    continue
                
                # Preprocess face
                processed_face = preprocess_face(face, target_size)
                if processed_face is None:
                    print(f"Warning: Could not process face in {filename}")
                    continue
                
                # Save processed face with sequential numbering
                face_filename = f"{os.path.splitext(filename)[0]}_face_{i}.jpg"
                cv2.imwrite(os.path.join(output_folder, face_filename), 
                            cv2.cvtColor(processed_face, cv2.COLOR_RGB2BGR))
            
            print(f"Found {len(face_boxes)} faces in {filename}")


test_images = []
test_folder = "../data/images/test_images/cleaned_images"
for filename in sorted(os.listdir(test_folder)):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        img_path = os.path.join(test_folder, filename)
        img = cv2.imread(img_path)
        if img is not None:
            test_images.append((filename, img))

output_folder = "../data/faces4_test"
extract_and_save_test_faces(test_images, output_folder)

Processing 0037.jpg...
Found 1 faces in 0037.jpg
Processing 0039.jpg...
Found 3 faces in 0039.jpg
Processing 0040.jpg...
Found 4 faces in 0040.jpg
Processing 0043.jpg...
Found 2 faces in 0043.jpg
Processing 0045.jpg...
Found 1 faces in 0045.jpg
Processing 0051.jpg...
No faces detected in 0051.jpg. Using fallback.
Found 1 faces in 0051.jpg
Processing 0058.jpg...
Found 1 faces in 0058.jpg
Processing 0062.jpg...
Found 1 faces in 0062.jpg
Processing 0066.jpg...
Found 2 faces in 0066.jpg
Processing 0078.jpg...
Found 1 faces in 0078.jpg
Processing 0103.jpg...
Found 1 faces in 0103.jpg
Processing 0125.jpg...
Found 1 faces in 0125.jpg
Processing 0139.jpg...
Found 1 faces in 0139.jpg
Processing 0160.jpg...
No faces detected in 0160.jpg. Using fallback.
Found 1 faces in 0160.jpg
Processing 0161.jpg...
Found 1 faces in 0161.jpg
Processing 0166.jpg...
Found 1 faces in 0166.jpg
Processing 0168.jpg...
Found 1 faces in 0168.jpg
Processing 0174.jpg...
Found 1 faces in 0174.jpg
Processing 0179.jpg...
F

In [None]:
# # Usage

# test_image_folder = "../data/images/test_images/cleaned_images"
# output_folder = "../data/images/test_faces"

# images_test, labels = load_images(test_image_folder)

# print(images_test[0][0])
# print(labels)
# output_folder = "../data/faces3_test"
# extract_and_save_faces(images_test, labels, output_folder)

# print("Preprocessed faces have been saved to the respective folders.")

0427.jpg
None


TypeError: 'NoneType' object is not subscriptable

In [3]:


output_folder = "../data/faces3_test"
extract_and_save_faces(images, labels, output_folder, target_size=(224, 224))


NameError: name 'images' is not defined

In [15]:
# Paths
train_image_folder = "../data/images/cleaned_images"
label_csv_path = "../data/labels/clean_data.csv"
output_folder = "../data/faces/train_faces"

# Load label data
import pandas as pd
label_data = pd.read_csv(label_csv_path)
label_data['label_name'] = label_data['label_name'].apply(eval)  # Convert string to list
label_map = dict(zip(label_data['image'].astype(str).str.zfill(4) + ".jpg", label_data['label_name']))

# Load training images and labels
train_images, train_labels = load_images(train_image_folder, label_map=label_map)

# Crop and save faces
print("Processing training images...")
crop_and_save_faces(train_images, labels=train_labels, output_folder=output_folder, batch_size=50)
print("Cropped faces have been saved.")


Processing training images...
Cropped faces have been saved.


In [19]:

# Paths
test_image_folder = "../data/images/test_images/cleaned_images"
output_folder = "../data/images/test_faces"

# Load test images (no labels required)
test_images, _ = load_images(test_image_folder)

# Crop and save faces
print("Processing test images...")
crop_and_save_faces(test_images, labels=None, output_folder=output_folder, batch_size=50)
print("Cropped faces have been saved.")


Processing test images...
Cropped faces have been saved.
