## Crop faces

In [30]:
from facenet_pytorch import MTCNN
from PIL import Image, ExifTags
import torch
import os
from pathlib import Path
# optional: nice progress bars in notebooks
from tqdm import tqdm

In [17]:

def fix_orientation(image: Image.Image) -> Image.Image:
    """Rotate image according to its EXIF orientation tag."""
    try:
        for orientation in ExifTags.TAGS.keys():
            if ExifTags.TAGS[orientation] == 'Orientation':
                break
        exif = image._getexif()
        if exif is not None:
            if exif.get(orientation) == 3:
                image = image.rotate(180, expand=True)
            elif exif.get(orientation) == 6:
                image = image.rotate(270, expand=True)
            elif exif.get(orientation) == 8:
                image = image.rotate(90, expand=True)
    except Exception:
        pass
    return image

In [None]:
class FacePreprocessor:
    def __init__(self, image_size=160, device=None):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.mtcnn = MTCNN(image_size=image_size, margin=0, device=self.device)

    def crop(self, image: Image.Image):
        """Returns cropped face as PIL.Image or None."""
        try:
            # Fix image orientation
            image = fix_orientation(image)
            
            # Detect face and return as tensor
            face_tensor = self.mtcnn(image)
            if face_tensor is None:
                return None

            # Convert image to tensor and scale to [0, 255]
            face_tensor = (face_tensor.clamp(0, 1) * 255).byte()

            # Convert tensor (C,H,W) to PIL image
            face_img = Image.fromarray(
                face_tensor.permute(1, 2, 0).int().cpu().numpy().astype("uint8")
            )
            return face_img
        except Exception as e:
            print(f"[WARN] Could not process image: {e}")
            return None

In [19]:
# source directory with images to process
base_dir = Path.cwd()
folder_path = base_dir.parent / "data" / "raw" / "me" / "photos"
# destination directory for cropped faces
dest_dir = base_dir.parent / "data" / "cropped" / "me" / "photos"

In [25]:
# source directory with images to process
base_dir = Path.cwd()
folder_path = base_dir.parent / "data" / "raw" / "not_me" / "photos"
# destination directory for cropped faces
dest_dir = base_dir.parent / "data" / "cropped" / "not_me" / "photos"

In [None]:
face_preprocessor = FacePreprocessor()
# cap the number of saved cropped faces
max_saved = 10
saved_count = 0

print(f"Processing images in {folder_path}...")

# check that source folder exists and is a directory
if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
    print(f"[ERROR] Folder path does not exist or is not a directory: {folder_path}")
else:
    # build a filtered list of image files to get an accurate progress bar
    all_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    for filename in tqdm(all_files, desc="Images", unit="file", total=max_saved):
        if saved_count >= max_saved:
            print(f"Reached cap of {max_saved} saved crops. Stopping.")
            break
        
        img_path = os.path.join(folder_path, filename)

        try:
            img = Image.open(img_path).convert("RGB")
            cropped_face = face_preprocessor.crop(img)
            if cropped_face:
                save_path = os.path.join(dest_dir, filename)
                cropped_face.save(save_path)
                saved_count += 1
                print(f"Saved cropped face to {save_path} (#{saved_count})")
            else:
                print(f"No face detected in {filename}")
        except Exception as e:
            print(f"[WARN] Could not open {img_path}: {e}")
            continue


Processing images in c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\raw\not_me\photos...


Images:   0%|          | 0/308 [00:00<?, ?file/s][A

Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10000.png (#1)

Images:   0%|          | 1/308 [00:00<00:54,  5.65file/s][A




Images:   1%|          | 2/308 [00:00<01:19,  3.86file/s][A

Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10001.png (#2)
Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10002.png (#3)


Images:   1%|▏         | 4/308 [00:00<00:52,  5.76file/s][A

Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10003.png (#4)


Images:   2%|▏         | 5/308 [00:00<00:56,  5.37file/s][A

Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10004.png (#5)
Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10005.png (#6)


Images:   2%|▏         | 6/308 [00:01<00:55,  5.39file/s][A

Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10006.png (#7)


Images:   3%|▎         | 8/308 [00:01<00:59,  5.03file/s][A

Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10007.png (#8)
Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10008.png (#9)


Images:   3%|▎         | 10/308 [00:01<00:58,  5.11file/s][A

Saved cropped face to c:\Users\areva\Desktop\U\ESPE3\identity_recognition\data\cropped\not_me\photos\10009.png (#10)
Reached cap of 10 saved crops. Stopping.



