In [1]:
import os
import cv2
from PIL import Image
import numpy as np
from mtcnn import MTCNN
from tqdm import tqdm

2025-07-14 15:29:23.506228: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-14 15:29:23.507634: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-14 15:29:23.531988: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-14 15:29:23.532518: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Input: Extracted LFW folder from Week 1
input_dir = "../data/lfw-funneled"

# Output: Folder where cropped face images will be saved
output_dir = "../data/processed_faces_mtcnn"

# Create output dir if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

In [3]:
# Load MTCNN face detector
detector = MTCNN()

In [4]:
# Loop over each person's folder and detect/crop faces
person_dirs = [d for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d))]

for person in tqdm(person_dirs, desc="Processing persons"):
    person_input_path = os.path.join(input_dir, person)
    person_output_path = os.path.join(output_dir, person)
    os.makedirs(person_output_path, exist_ok=True)

    for img_name in os.listdir(person_input_path):
        if not img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.pgm')):
            continue

        img_path = os.path.join(person_input_path, img_name)
        img = cv2.imread(img_path)
        if img is None:
            continue

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = detector.detect_faces(img_rgb)

        if results:
            # Get the bounding box of the first detected face
            x, y, w, h = results[0]['box']
            x, y = max(0, x), max(0, y)
            cropped_face = img_rgb[y:y+h, x:x+w]

            # Resize cropped face to 160x160 for later embeddings
            cropped_face = cv2.resize(cropped_face, (160, 160))
            
            # Save as RGB
            save_path = os.path.join(person_output_path, img_name)
            Image.fromarray(cropped_face).save(save_path)

Processing persons:   0%|          | 0/100 [00:00<?, ?it/s]



Processing persons:   1%|          | 1/100 [00:00<01:02,  1.59it/s]



Processing persons:   2%|▏         | 2/100 [00:01<01:01,  1.59it/s]



Processing persons:   3%|▎         | 3/100 [00:01<01:00,  1.60it/s]



Processing persons:   4%|▍         | 4/100 [00:02<00:48,  1.98it/s]



Processing persons:   5%|▌         | 5/100 [00:02<00:41,  2.31it/s]



Processing persons:   6%|▌         | 6/100 [00:02<00:36,  2.56it/s]



Processing persons:   7%|▋         | 7/100 [00:03<00:51,  1.81it/s]



Processing persons:   8%|▊         | 8/100 [00:04<00:43,  2.12it/s]



Processing persons:   9%|▉         | 9/100 [00:04<00:38,  2.37it/s]



Processing persons:  10%|█         | 10/100 [00:04<00:34,  2.60it/s]



Processing persons:  11%|█         | 11/100 [00:04<00:31,  2.79it/s]



Processing persons:  12%|█▏        | 12/100 [00:08<01:55,  1.31s/it]



Processing persons:  13%|█▎        | 13/100 [00:08<01:27,  1.01s/it]



Processing persons:  14%|█▍        | 14/100 [00:09<01:16,  1.13it/s]



Processing persons:  15%|█▌        | 15/100 [00:09<00:59,  1.42it/s]



Processing persons:  16%|█▌        | 16/100 [00:09<00:49,  1.71it/s]



Processing persons:  17%|█▋        | 17/100 [00:11<01:10,  1.18it/s]



Processing persons:  18%|█▊        | 18/100 [00:11<00:55,  1.47it/s]



Processing persons:  19%|█▉        | 19/100 [00:11<00:46,  1.75it/s]



Processing persons:  20%|██        | 20/100 [00:12<00:39,  2.04it/s]



Processing persons:  21%|██        | 21/100 [00:12<00:33,  2.34it/s]



Processing persons:  22%|██▏       | 22/100 [00:12<00:29,  2.64it/s]



Processing persons:  23%|██▎       | 23/100 [00:13<00:26,  2.90it/s]



Processing persons:  24%|██▍       | 24/100 [00:13<00:25,  3.00it/s]



Processing persons:  25%|██▌       | 25/100 [00:13<00:24,  3.08it/s]



Processing persons:  26%|██▌       | 26/100 [00:13<00:23,  3.19it/s]



Processing persons:  27%|██▋       | 27/100 [00:14<00:22,  3.24it/s]



Processing persons:  28%|██▊       | 28/100 [00:14<00:21,  3.28it/s]



Processing persons:  29%|██▉       | 29/100 [00:14<00:21,  3.28it/s]



Processing persons:  30%|███       | 30/100 [00:15<00:21,  3.30it/s]



Processing persons:  31%|███       | 31/100 [00:15<00:21,  3.28it/s]



Processing persons:  32%|███▏      | 32/100 [00:15<00:20,  3.28it/s]



Processing persons:  33%|███▎      | 33/100 [00:16<00:33,  2.03it/s]



Processing persons:  34%|███▍      | 34/100 [00:17<00:28,  2.30it/s]



Processing persons:  35%|███▌      | 35/100 [00:17<00:25,  2.53it/s]



Processing persons:  36%|███▌      | 36/100 [00:17<00:29,  2.17it/s]



Processing persons:  37%|███▋      | 37/100 [00:18<00:31,  2.00it/s]



Processing persons:  38%|███▊      | 38/100 [00:19<00:43,  1.44it/s]



Processing persons:  39%|███▉      | 39/100 [00:20<00:45,  1.33it/s]



Processing persons:  40%|████      | 40/100 [00:21<00:44,  1.35it/s]



Processing persons:  41%|████      | 41/100 [00:22<00:46,  1.27it/s]



Processing persons:  42%|████▏     | 42/100 [00:22<00:41,  1.38it/s]



Processing persons:  43%|████▎     | 43/100 [00:23<00:33,  1.69it/s]



Processing persons:  44%|████▍     | 44/100 [00:23<00:27,  2.01it/s]



Processing persons:  45%|████▌     | 45/100 [00:23<00:23,  2.33it/s]



Processing persons:  46%|████▌     | 46/100 [00:23<00:20,  2.61it/s]



Processing persons:  47%|████▋     | 47/100 [00:29<01:39,  1.87s/it]



Processing persons:  48%|████▊     | 48/100 [00:29<01:12,  1.40s/it]



Processing persons:  49%|████▉     | 49/100 [00:29<00:54,  1.07s/it]



Processing persons:  50%|█████     | 50/100 [00:31<00:55,  1.11s/it]



Processing persons:  51%|█████     | 51/100 [00:31<00:42,  1.15it/s]



Processing persons:  52%|█████▏    | 52/100 [00:31<00:33,  1.43it/s]



Processing persons:  53%|█████▎    | 53/100 [00:31<00:27,  1.73it/s]



Processing persons:  54%|█████▍    | 54/100 [00:32<00:22,  2.02it/s]



Processing persons:  55%|█████▌    | 55/100 [00:32<00:19,  2.28it/s]



Processing persons:  56%|█████▌    | 56/100 [00:32<00:17,  2.50it/s]



Processing persons:  57%|█████▋    | 57/100 [00:33<00:23,  1.79it/s]



Processing persons:  58%|█████▊    | 58/100 [00:34<00:20,  2.09it/s]



Processing persons:  59%|█████▉    | 59/100 [00:35<00:28,  1.46it/s]



Processing persons:  60%|██████    | 60/100 [00:35<00:22,  1.75it/s]



Processing persons:  61%|██████    | 61/100 [00:36<00:22,  1.71it/s]



Processing persons:  62%|██████▏   | 62/100 [00:36<00:19,  2.00it/s]



Processing persons:  63%|██████▎   | 63/100 [00:37<00:26,  1.40it/s]



Processing persons:  64%|██████▍   | 64/100 [00:37<00:21,  1.69it/s]



Processing persons:  65%|██████▌   | 65/100 [00:38<00:17,  1.98it/s]



Processing persons:  66%|██████▌   | 66/100 [00:38<00:15,  2.25it/s]



Processing persons:  67%|██████▋   | 67/100 [00:39<00:16,  2.00it/s]



Processing persons:  68%|██████▊   | 68/100 [00:39<00:14,  2.26it/s]



Processing persons:  69%|██████▉   | 69/100 [00:39<00:12,  2.48it/s]



Processing persons:  70%|███████   | 70/100 [00:40<00:16,  1.77it/s]



Processing persons:  71%|███████   | 71/100 [00:41<00:14,  2.05it/s]



Processing persons:  72%|███████▏  | 72/100 [00:41<00:12,  2.32it/s]



Processing persons:  73%|███████▎  | 73/100 [00:41<00:10,  2.53it/s]



Processing persons:  74%|███████▍  | 74/100 [00:41<00:09,  2.71it/s]



Processing persons:  75%|███████▌  | 75/100 [00:42<00:08,  2.87it/s]



Processing persons:  76%|███████▌  | 76/100 [00:42<00:08,  2.93it/s]



Processing persons:  77%|███████▋  | 77/100 [00:42<00:07,  3.04it/s]



Processing persons:  78%|███████▊  | 78/100 [00:43<00:07,  3.07it/s]



Processing persons:  79%|███████▉  | 79/100 [00:43<00:06,  3.11it/s]



Processing persons:  80%|████████  | 80/100 [00:43<00:06,  3.16it/s]



Processing persons:  81%|████████  | 81/100 [00:44<00:05,  3.20it/s]



Processing persons:  82%|████████▏ | 82/100 [00:44<00:06,  2.63it/s]



Processing persons:  83%|████████▎ | 83/100 [00:45<00:06,  2.66it/s]



Processing persons:  84%|████████▍ | 84/100 [00:45<00:07,  2.10it/s]



Processing persons:  85%|████████▌ | 85/100 [00:46<00:06,  2.26it/s]



Processing persons:  86%|████████▌ | 86/100 [00:47<00:11,  1.23it/s]



Processing persons:  87%|████████▋ | 87/100 [00:48<00:08,  1.51it/s]



Processing persons:  88%|████████▊ | 88/100 [00:49<00:08,  1.34it/s]



Processing persons:  89%|████████▉ | 89/100 [00:49<00:06,  1.63it/s]



Processing persons:  90%|█████████ | 90/100 [00:49<00:05,  1.92it/s]



Processing persons:  91%|█████████ | 91/100 [00:50<00:06,  1.35it/s]



Processing persons:  92%|█████████▏| 92/100 [00:51<00:04,  1.63it/s]



Processing persons:  93%|█████████▎| 93/100 [00:51<00:04,  1.62it/s]



Processing persons:  94%|█████████▍| 94/100 [00:53<00:05,  1.11it/s]



Processing persons:  95%|█████████▌| 95/100 [00:54<00:04,  1.22it/s]



Processing persons:  96%|█████████▌| 96/100 [00:54<00:02,  1.49it/s]



Processing persons:  97%|█████████▋| 97/100 [00:55<00:02,  1.07it/s]



Processing persons:  98%|█████████▊| 98/100 [00:56<00:01,  1.33it/s]



Processing persons:  99%|█████████▉| 99/100 [00:56<00:00,  1.59it/s]



Processing persons: 100%|██████████| 100/100 [00:56<00:00,  1.76it/s]


In [5]:
# Summary: Count how many faces we saved
total_people = len(os.listdir(output_dir))
total_cropped_images = 0

for person in os.listdir(output_dir):
    person_path = os.path.join(output_dir, person)
    total_cropped_images += len(os.listdir(person_path))

print(f"\n✅ Total People Processed: {total_people}")
print(f"✅ Total Cropped Face Images Saved: {total_cropped_images}")


✅ Total People Processed: 100
✅ Total Cropped Face Images Saved: 186
