<a href="https://colab.research.google.com/github/SharvaniBaggani/DietGPT/blob/main/utils_image_preprocess_py_%E2%80%94_face_detect%2C_align%2C_crop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# utils/image_preprocess.py
import cv2
import numpy as np
import mediapipe as mp

mp_face = mp.solutions.face_mesh

def detect_and_align_face_bgr(image_bgr, target_size=224, margin=1.2):
    """
    Detects face using Mediapipe FaceMesh, aligns based on eye centers, crops square,
    and returns resized RGB image (0-1 float).
    """
    img_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    with mp_face.FaceMesh(static_image_mode=True, max_num_faces=1) as face_mesh:
        results = face_mesh.process(img_rgb)
        if not results.multi_face_landmarks:
            return None  # no face found
        landmarks = results.multi_face_landmarks[0].landmark
        h, w, _ = image_bgr.shape

        # Estimate bounding box from landmarks
        xs = [lm.x for lm in landmarks]
        ys = [lm.y for lm in landmarks]
        x_min = int(max(0, min(xs) * w))
        x_max = int(min(w, max(xs) * w))
        y_min = int(max(0, min(ys) * h))
        y_max = int(min(h, max(ys) * h))

        # Expand box by margin
        box_w = x_max - x_min
        box_h = y_max - y_min
        box_size = int(max(box_w, box_h) * margin)
        center_x = (x_min + x_max) // 2
        center_y = (y_min + y_max) // 2
        x1 = max(0, center_x - box_size // 2)
        y1 = max(0, center_y - box_size // 2)
        x2 = min(w, center_x + box_size // 2)
        y2 = min(h, center_y + box_size // 2)

        # Crop and resize
        crop = image_bgr[y1:y2, x1:x2]
        crop_rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
        resized = cv2.resize(crop_rgb, (target_size, target_size))
        # Normalize to [0,1]
        normalized = resized.astype(np.float32) / 255.0
        return normalized

# Quick test:
# img = cv2.imread("sample.jpg")
# out = detect_and_align_face_bgr(img)
# if out is not None: print(out.shape)




In [4]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting numpy<2 (from mediapipe)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.3-py3-none-any.whl.metadata (1.6 kB)
INFO: pip is looking at multiple versions of jax to determine which version is compatible with other requirements. This could take a while.
Collecting jax (from mediapipe)
  Downloading jax-0.8.0-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.8.0-cp312-cp312-manylinux_2_27_x86_64.whl.metadata (1.3 kB)
Collecting jax (from mediapipe)
  Do