# Extract faces from images

Preprocess the 
[Yale face dataset](https://www.kaggle.com/datasets/olgabelitskaya/yale-face-database/code?resource=download)
to obtain images where the face is centered.

In [None]:
from pathlib import Path

import cv2
from loguru import logger as lg
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt


In [None]:
def crop_face(
    img_path: Path,
    haar_cascade: cv2.CascadeClassifier,
    w_crop: int,
    h_crop: int,
    data_crop_fol: Path,
) -> int:
    """Crop the face from the image and save it as a png file.

    If no face is found, the image is not saved.
    If more than one face is found, the first one is used.

    Args:
        img_path (Path): Path to the image.
        haar_cascade (cv2.CascadeClassifier): Haar cascade classifier.
        w_crop (int): Width of the cropped image.
        h_crop (int): Height of the cropped image.
        data_crop_fol (Path): Path to the folder where the cropped images will be saved.

    Returns:
        int: Number of faces found in the image.
    """
    # load the image as ndarray[float64, float64]
    pix = plt.imread(str(img_path))

    # convert it to grayscale for opencv
    im = np.array(pix, dtype=np.uint8)

    # detect faces in the image
    faces = haar_cascade.detectMultiScale(
        im,
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=(30, 30),
        flags=cv2.CASCADE_SCALE_IMAGE,
    )

    # sanity check
    if len(faces) == 0:
        lg.warning(f"No faces found in {img_path}")
        return 0
    if len(faces) > 1:
        lg.warning(f"{len(faces)} found in {img_path}")

    # get the corner and the size of the face
    x, y, w, h = faces[0]

    # crop the image
    im_crop = crop_face_within_bounds(im, w_crop, h_crop, x, y, w, h)

    # save the image as a png file
    img_path_crop = data_crop_fol / f"{img_path.name}.png"
    cv2.imwrite(str(img_path_crop), im_crop)

    return len(faces)


def crop_face_within_bounds(
    im: npt.NDArray[np.uint8],
    w_crop: int,
    h_crop: int,
    x: int,
    y: int,
    w: int,
    h: int,
):
    """Crop the image within the bounds of the original image.

    Args:
        im (npt.NDArray[np.uint8]): Image to crop.
        w_crop (int): Width of the cropped image.
        h_crop (int): Height of the cropped image.
        x (int): X coordinate of the top left corner of the face.
        y (int): Y coordinate of the top left corner of the face.
        w (int): Width of the face.
        h (int): Height of the face.

    Returns:
        npt.NDArray[np.uint8]: Cropped image.
    """
    # compute the center
    x_center = x + w // 2
    y_center = y + h // 2

    # compute the bbox
    x_left = x_center - w_crop // 2
    x_right = x_center + w_crop // 2
    y_top = y_center + h_crop // 2
    y_bottom = y_center - h_crop // 2

    # get the original dimensions
    h_orig, w_orig = im.shape

    # fix the bbox outside the image
    if x_left < 0:
        x_left = 0
        x_right = w_crop
    if x_right >= w_orig:
        x_left = w_orig - w_crop
        x_right = w_orig
    if y_bottom < 0:
        y_bottom = 0
        y_top = h_crop
    if y_top >= h_orig:
        y_bottom = h_orig - h_crop
        y_top = h_orig

    # crop the image
    im_crop = im[y_bottom:y_top, x_left:x_right]
    return im_crop


In [None]:
data_root_fol = Path("~/data/yaleface/data").expanduser()


In [None]:
# load the haar algorithm file
alg = "haarcascade_frontalface_default.xml"

# pass the algorithm to OpenCV
haar_cascade: cv2.CascadeClassifier = cv2.CascadeClassifier(alg)

# create the output folder
data_crop_fol = data_root_fol.parent / "data_crop_tes"
if not data_crop_fol.exists():
    data_crop_fol.mkdir(parents=True, exist_ok=True)
print(f"Saving cropped images in {data_crop_fol}")

w_crop = 160
h_crop = 180

for img_path in data_root_fol.iterdir():
    crop_face(img_path, haar_cascade, w_crop, h_crop, data_crop_fol)
    # break
