In [7]:

# ==== Colab-Safe Face Cropper | YOLOv5-face + (CPU) SCRFD fallback ====
# Input:  /content/images
# Output: /content/cropped_faces
# Aligns by eyes, square-crops, resizes to 320x320. Works on Colab Py3.12 + NumPy 2.x.
# ----------------------------------------------------------------------

# 1) Install deps compatible with current Colab (Py3.12 + NumPy 2.x)
!pip -q install "opencv-python-headless>=4.10.0.84" tqdm matplotlib
# Fallback detector only (CPU): InsightFace + onnxruntime (Py3.12-compatible)
!pip -q install insightface==0.7.3 onnxruntime==1.20.1

import os, sys, math, urllib.request, warnings
import cv2
import numpy as np
from glob import glob
from tqdm import tqdm

warnings.filterwarnings("ignore")

# ---------- Config ----------
INPUT_DIR  = "/content/images"
OUTPUT_DIR = "/content/cropped_faces"
SAVE_ALL_FACES = False      # False: only largest face per image; True: save all faces
TARGET_SIZE = (320, 320)    # final (width, height)
ASPECT_W, ASPECT_H = 1, 1   # enforce square
MARGIN = 1.5                # expand around face;
MIN_CROP_MIN_DIM = 40       # skip tiny crops
CONF_THRES = 0.25           # YOLOv5-face confidence threshold
IOU_THRES = 0.45            # YOLOv5-face NMS threshold
IMG_SIZE = 640              # YOLOv5-face inference size
# ----------------------------

os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ---------- Geometry helpers ----------
def rotate_align(image, kps):
    """
    Align image so eyes are horizontal using 5-point landmarks.
    kps expected: [ [x_le, y_le], [x_re, y_re], nose, lm, rm ]
    """
    kps = np.asarray(kps, dtype=np.float32)
    left_eye, right_eye = kps[0], kps[1]
    dx, dy = right_eye[0] - left_eye[0], right_eye[1] - left_eye[1]
    angle = math.degrees(math.atan2(dy, dx))
    center = ((left_eye[0] + right_eye[0]) / 2.0, (left_eye[1] + right_eye[1]) / 2.0)
    M = cv2.getRotationMatrix2D((float(center[0]), float(center[1])), angle, 1.0)
    h, w = image.shape[:2]
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
    # transform landmarks
    kps_h = np.hstack([kps, np.ones((kps.shape[0], 1), dtype=np.float32)])  # (5,3)
    kps_rot = (M @ kps_h.T).T
    return rotated, kps_rot

def bbox_from_landmarks(kps, scale=MARGIN):
    x_min, y_min = np.min(kps, axis=0)
    x_max, y_max = np.max(kps, axis=0)
    cx, cy = (x_min + x_max) / 2.0, (y_min + y_max) / 2.0
    w, h = (x_max - x_min), (y_max - y_min)
    w *= (1 + scale)
    h *= (1 + scale * 1.2)  # slightly more vertical room for forehead/chin
    return [cx - w/2, cy - h/2, cx + w/2, cy + h/2]

def enforce_aspect_and_pad(img, box, aspect=(ASPECT_W, ASPECT_H)):
    """
    Expand the shorter side to match target aspect, pad out-of-bounds, then crop.
    """
    h, w = img.shape[:2]
    x1, y1, x2, y2 = map(float, box)
    bw, bh = x2 - x1, y2 - y1
    target_ratio = aspect[0] / aspect[1]
    curr_ratio = (bw / bh) if bh > 1e-6 else target_ratio

    if curr_ratio > target_ratio:
        # too wide -> increase height
        new_bh = bw / target_ratio
        delta = (new_bh - bh) / 2.0
        y1 -= delta; y2 += delta
    else:
        # too tall -> increase width
        new_bw = bh * target_ratio
        delta = (new_bw - bw) / 2.0
        x1 -= delta; x2 += delta

    # pad out-of-bounds with replicate
    left_pad   = max(0, int(-np.floor(x1)))
    top_pad    = max(0, int(-np.floor(y1)))
    right_pad  = max(0, int(np.ceil(x2) - w))
    bottom_pad = max(0, int(np.ceil(y2) - h))

    if any([left_pad, top_pad, right_pad, bottom_pad]):
        img = cv2.copyMakeBorder(img, top_pad, bottom_pad, left_pad, right_pad, cv2.BORDER_REPLICATE)
        x1 += left_pad; x2 += left_pad
        y1 += top_pad;  y2 += top_pad

    x1i, y1i, x2i, y2i = map(lambda v: int(round(v)), [x1, y1, x2, y2])
    x1i = max(0, x1i); y1i = max(0, y1i)
    x2i = min(img.shape[1], x2i); y2i = min(img.shape[0], y2i)
    crop = img[y1i:y2i, x1i:x2i]
    return crop

def face_area_bbox(b):
    x1,y1,x2,y2 = b
    return max(0,(x2-x1))*max(0,(y2-y1))

# ---------- YOLOv5-face (primary) ----------
yolo_ok = False
try:
    YOLO_DIR = "/content/yolov5-face"
    WEIGHTS = "/content/yolov5s-face.pt"
    if not os.path.exists(YOLO_DIR):
        # Repo includes utilities for face landmarks + NMS
        !git clone -q https://github.com/deepcam-cn/yolov5-face.git {YOLO_DIR}
    sys.path.append(YOLO_DIR)

    # Download weights (primary + mirror)
    if not os.path.exists(WEIGHTS):
        urls = [
            "https://github.com/deepcam-cn/yolov5-face/releases/download/v1.0/yolov5s-face.pt",
            "https://huggingface.co/PinPong/yolov5-face/resolve/main/yolov5s-face.pt",
        ]
        for u in urls:
            try:
                urllib.request.urlretrieve(u, WEIGHTS)
                break
            except Exception:
                pass

    import torch
    from models.experimental import attempt_load
    from utils.augmentations import letterbox
    from utils.general import non_max_suppression_face, scale_coords
    try:
        # Some forks provide a helper for landmark scaling; if not found, we implement small helper
        from utils.general import scale_coords_landmarks
    except Exception:
        def scale_coords_landmarks(img_shape, coords, original_shape, ratio_pad=None):
            # coords: (N,10) [le_x,le_y,...,rm_y]
            if ratio_pad is None:
                gain = min(img_shape[0] / original_shape[0], img_shape[1] / original_shape[1])
                pad = ((img_shape[1] - original_shape[1] * gain) / 2, (img_shape[0] - original_shape[0] * gain) / 2)
            else:
                gain = ratio_pad[0]
                pad = ratio_pad[1]
            coords[:, 0::2] -= pad[0]  # x
            coords[:, 1::2] -= pad[1]  # y
            coords[:, 0::2] /= gain
            coords[:, 1::2] /= gain
            coords[:, 0::2] = coords[:, 0::2].clip(0, original_shape[1]-1)
            coords[:, 1::2] = coords[:, 1::2].clip(0, original_shape[0]-1)
            return coords

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = attempt_load(WEIGHTS, map_location=device)
    model.eval()
    stride = int(getattr(model, 'stride', torch.tensor([32])).max())

    @torch.no_grad()
    def yolo5_face_detect(img_bgr, conf_thres=CONF_THRES, iou_thres=IOU_THRES, img_size=IMG_SIZE):
        """
        Returns list of dicts: {'bbox':[x1,y1,x2,y2], 'score':float, 'kps':np.array(5,2)}
        """
        img0 = img_bgr
        h0, w0 = img0.shape[:2]
        # Letterbox
        im, ratio, (dw, dh) = letterbox(img0, new_shape=img_size, stride=stride, auto=True, scaleFill=False)
        im = im[:, :, ::-1].transpose(2, 0, 1)  # BGR->RGB, to CHW
        im = np.ascontiguousarray(im)
        im = torch.from_numpy(im).to(device).float() / 255.0
        im = im.unsqueeze(0)

        pred = model(im)[0]
        # NMS specialized for face (with 5 landmarks)
        det = non_max_suppression_face(pred, conf_thres, iou_thres)[0]  # (n, 15): box(4)+conf(1)+landmarks(10)
        results = []
        if det is None or len(det) == 0:
            return results

        # Rescale boxes and landmarks
        det = det.cpu()
        boxes = det[:, :4]
        scores = det[:, 4]
        lms = det[:, 5:15]  # five (x,y) pairs -> 10 cols

        boxes = scale_coords(im.shape[2:], boxes, img0.shape).round()
        lms = scale_coords_landmarks(im.shape[2:], lms.clone(), img0.shape)
        for j in range(boxes.shape[0]):
            x1, y1, x2, y2 = boxes[j].tolist()
            score = float(scores[j].item())
            k = lms[j].view(5, 2).numpy()
            results.append({'bbox':[float(x1), float(y1), float(x2), float(y2)],
                            'score':score, 'kps':k.astype(np.float32)})
        return results

    yolo_ok = True
    print("[INFO] YOLOv5-face ready on", device.upper())
except Exception as e:
    print("[WARN] YOLOv5-face init failed:", e)
    yolo_ok = False

# ---------- SCRFD fallback (CPU only to avoid CUDA mismatches) ----------
scrfd = None
if not yolo_ok:
    try:
        from insightface.app import FaceAnalysis
        scrfd = FaceAnalysis(name="buffalo_l")
        scrfd.prepare(ctx_id=-1, det_size=(640, 640))  # CPU
        print("[INFO] Using SCRFD fallback (CPU, InsightFace).")
    except Exception as e:
        print("[ERROR] SCRFD fallback init failed:", e)

def process_with_yolov5_face(img):
    dets = yolo5_face_detect(img, conf_thres=CONF_THRES, iou_thres=IOU_THRES, img_size=IMG_SIZE)
    if not dets:
        return []
    # sort by area
    dets = sorted(dets, key=lambda d: face_area_bbox(d['bbox']), reverse=True)
    chosen = dets if SAVE_ALL_FACES else [dets[0]]
    outputs = []
    for d in chosen:
        kps = d.get('kps', None)
        if kps is not None and np.array(kps).shape == (5,2):
            aligned, kps_rot = rotate_align(img, kps)
            box = bbox_from_landmarks(kps_rot, scale=MARGIN)
            crop = enforce_aspect_and_pad(aligned, box, aspect=(ASPECT_W, ASPECT_H))
        else:
            x1,y1,x2,y2 = d['bbox']
            cx, cy = (x1+x2)/2.0, (y1+y2)/2.0
            bw, bh = (x2-x1), (y2-y1)
            bw *= (1 + MARGIN); bh *= (1 + MARGIN*1.2)
            box = [cx-bw/2.0, cy-bh/2.0, cx+bw/2.0, cy+bh/2.0]
            crop = enforce_aspect_and_pad(img, box, aspect=(ASPECT_W, ASPECT_H))
        if crop is not None and min(crop.shape[:2]) >= MIN_CROP_MIN_DIM:
            outputs.append(cv2.resize(crop, TARGET_SIZE, interpolation=cv2.INTER_AREA))
    return outputs

def process_with_scrfd(img):
    faces = scrfd.get(img)
    faces = [f for f in faces if getattr(f, "det_score", 1.0) >= 0.5]
    if not faces:
        return []
    faces = sorted(faces, key=lambda f: face_area_bbox(f.bbox.astype(float)), reverse=True)
    chosen = faces if SAVE_ALL_FACES else [faces[0]]
    outputs = []
    for f in chosen:
        kps = getattr(f, "kps", None)
        if kps is not None and np.array(kps).shape == (5,2):
            aligned, kps_rot = rotate_align(img, kps)
            box = bbox_from_landmarks(kps_rot, scale=MARGIN)
            crop = enforce_aspect_and_pad(aligned, box, aspect=(ASPECT_W, ASPECT_H))
        else:
            x1,y1,x2,y2 = f.bbox.astype(float)
            cx, cy = (x1+x2)/2.0, (y1+y2)/2.0
            bw, bh = (x2-x1), (y2-y1)
            bw *= (1 + MARGIN); bh *= (1 + MARGIN*1.2)
            box = [cx-bw/2.0, cy-bh/2.0, cx+bw/2.0, cy+bh/2.0]
            crop = enforce_aspect_and_pad(img, box, aspect=(ASPECT_W, ASPECT_H))
        if crop is not None and min(crop.shape[:2]) >= MIN_CROP_MIN_DIM:
            outputs.append(cv2.resize(crop, TARGET_SIZE, interpolation=cv2.INTER_AREA))
    return outputs

# ---------- Process folder ----------
paths = sorted(
    [p for ext in ("*.jpg","*.jpeg","*.png","*.bmp","*.webp","*.tif","*.tiff")
     for p in glob(os.path.join(INPUT_DIR, ext))]
)

print(f"Found {len(paths)} images in {INPUT_DIR}")
saved = 0
skipped = 0
for p in tqdm(paths):
    img = cv2.imread(p)
    if img is None:
        skipped += 1
        continue
    crops = []
    if yolo_ok:
        crops = process_with_yolov5_face(img)
        # optionally try fallback if nothing found
        if not crops and scrfd is not None:
            crops = process_with_scrfd(img)
    elif scrfd is not None:
        crops = process_with_scrfd(img)

    base = os.path.splitext(os.path.basename(p))[0]
    if not crops:
        skipped += 1
        continue
    if len(crops) == 1:
        out_path = os.path.join(OUTPUT_DIR, f"{base}.jpg")
        cv2.imwrite(out_path, crops[0]); saved += 1
    else:
        for i, c in enumerate(crops, start=1):
            out_path = os.path.join(OUTPUT_DIR, f"{base}_{i}.jpg")
            cv2.imwrite(out_path, c); saved += 1

print(f"Done.\nSaved crops: {saved}\nImages with no accepted face: {skipped}\nOutput: {OUTPUT_DIR}")

# ---------- Preview ----------
import matplotlib.pyplot as plt
sample_outs = sorted(glob(os.path.join(OUTPUT_DIR, "*.jpg")))[:8]
if sample_outs:
    cols = 4
    rows = int(np.ceil(len(sample_outs)/cols))
    plt.figure(figsize=(cols*3, rows*3))
    for i, p in enumerate(sample_outs, 1):
        im = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB)
        plt.subplot(rows, cols, i)
        plt.imshow(im); plt.title(os.path.basename(p), fontsize=8); plt.axis('off')
    plt.tight_layout(); plt.show()
else:
    print("No output previews available yet.")


[WARN] YOLOv5-face init failed: No module named 'utils.augmentations'
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /root/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /root/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /root/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /root/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /root/.insightface/models/buffalo_l/w600k_r50.onnx recognition [

100%|██████████| 10/10 [00:07<00:00,  1.30it/s]


Done.
Saved crops: 9
Images with no accepted face: 1
Output: /content/cropped_faces
