# CELL 1 — Imports & Config

In [3]:
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from insightface.app import FaceAnalysis
from scipy.spatial.distance import cosine
import random
import os


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/ubuntu_usr/miniconda3/envs/person_ident/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/ubuntu_usr/miniconda3/envs/person_ident/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/ubuntu_usr/miniconda3/envs/person_ident/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/ubuntu_usr/miniconda3/envs/person_ident/lib/python3.9/site-packages/t

AttributeError: _ARRAY_API not found

SystemError: <built-in function __import__> returned a result with an error set

  check_for_updates()


In [4]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SIM_THRESHOLD = 0.50   # 50% as you requested


In [5]:
print(DEVICE)

cuda


# CELL 2 — Load Models

#### Yolo v8 - Person Detection 

In [6]:
yolo = YOLO("yolov8n.pt")  # use yolov8s.pt if GPU is good


#### InsightFace (Face Recognition)

In [7]:
face_app = FaceAnalysis(
    name="buffalo_l",
    providers=["CPUExecutionProvider"]
)

# CRITICAL: ctx_id = -1 for CPU
face_app.prepare(ctx_id=-1, det_size=(640, 640))

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/ubuntu_usr/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/ubuntu_usr/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/ubuntu_usr/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/ubuntu_usr/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/ubuntu_usr/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3, 112,

# CELL 3 — Reference Image Embedding

In [8]:
def get_reference_embedding(img_path):
    img = cv2.imread(img_path)

    if img is None:
        raise ValueError(f"Image not found or unreadable: {img_path}")

    faces = face_app.get(img)

    if not faces:
        raise ValueError("No face found in reference image")

    # largest face by area
    faces = sorted(
        faces,
        key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
        reverse=True
    )

    return faces[0].embedding


In [9]:
import onnxruntime as ort
print(ort.get_available_providers())


['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'AzureExecutionProvider', 'CPUExecutionProvider']


In [10]:
abs_path = os.getcwd()
print(abs_path)

/mnt/f/keltron/projects/person_detection


In [11]:
img_path = abs_path + "/" + "data_check/chris_patt_img_test.jpg"
print(img_path)

/mnt/f/keltron/projects/person_detection/data_check/chris_patt_img_test.jpg


In [None]:
ref_embedding = get_reference_embedding(img_path)


# CELL 4 — Video / Webcam Processing

Supports:

* Webcam
* USB camera
* Video file
* 4K HDR (OpenCV handles it, FPS will drop)

In [None]:
vid_path = abs_path + "/" + "data_check/video_of_christ_patt.mp4"
print(vid_path)

In [None]:
cap = cv2.VideoCapture(vid_path)  # changed to video path if needed


# CELL 5 — Core Matching + Visualization Logic

This is where everything comes together.

In [None]:
def random_color():
    return tuple(random.randint(0,255) for _ in range(3))


In [None]:
# --- setup video writer once ---
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(
    "person_search_output.mp4",
    fourcc,
    fps if fps > 0 else 25,
    (width, height)
)

frame_id = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    detections = yolo(frame, conf=0.4, classes=[0])[0]  # person class

    found = False
    best_match = 0.0

    for box in detections.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])

        # safety clamp (avoid empty crops)
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(frame.shape[1], x2), min(frame.shape[0], y2)

        person_crop = frame[y1:y2, x1:x2]
        if person_crop.size == 0:
            continue

        faces = face_app.get(person_crop)
        if not faces:
            continue

        face = faces[0]
        emb = face.embedding
        similarity = 1 - cosine(ref_embedding, emb)
        similarity_pct = similarity * 100

        best_match = max(best_match, similarity_pct)

        if similarity_pct > SIM_THRESHOLD * 100:
            found = True
            label = f"TARGET FOUND: {similarity_pct:.1f}%"
            color = (0, 255, 0)
            thickness = 3
        else:
            label = f"Resemblance: {similarity_pct:.1f}%"
            color = random_color()
            thickness = 2

        cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
        cv2.putText(
            frame,
            label,
            (x1, max(0, y1 - 10)),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            color,
            2
        )

    if not found and best_match < 50:
        cv2.putText(
            frame,
            "TARGET NOT FOUND",
            (20, 40),
            cv2.FONT_HERSHEY_SIMPLEX,
            1.0,
            (0, 0, 255),
            3
        )

    # write annotated frame to file
    out.write(frame)

    frame_id += 1
    if frame_id % 100 == 0:
        print(f"[INFO] processed {frame_id} frames")

cap.release()
out.release()
