In [None]:
import cv2
import torch
import numpy as np
from PIL import Image
from facenet_pytorch import MTCNN, InceptionResnetV1
import time
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
import pyttsx3

# Initialize text-to-speech engine
engine = pyttsx3.init()

executor = ThreadPoolExecutor(max_workers=2)
task_lock = Lock()  # Lock to ensure thread-safe access to executor

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load models
mtcnn = MTCNN(keep_all=True, device=device, min_face_size=25)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Load embeddings and names from .npz
dataset_path = "/Users/rajataggarwal/Desktop/dataset"
data = np.load(f"{dataset_path}/embeddings.npz")
stored_embeddings = data['embeddings']
name_list = data['names']

# Webcam setup
cap = cv2.VideoCapture(0) #cap = cv2.VideoCapture(0, cv2.CAP_FFMPEG)
#cap = cv2.VideoCapture(0, cv2.CAP_AVFOUNDATION)  # macOS
#cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 480)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)

if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()

DISTANCE_THRESHOLD = 0.75
frame_counter = 0
recognized_faces = []
processing = False
processing_time = 0.0

def process_frame(input_frame, resized_frame):
    global recognized_faces, processing, processing_time
    start_time = time.time()

    img_rgb = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
    boxes, _ = mtcnn.detect(img_rgb)
    faces = mtcnn(Image.fromarray(img_rgb))

    recognized = []
    if faces is not None and boxes is not None:
        try:
            faces = torch.stack([f for f in faces]).to(device)
        except:
            processing = False
            return

        with torch.no_grad():
            embeddings = resnet(faces).cpu().numpy()

        scale_x = input_frame.shape[1] / resized_frame.shape[1]
        scale_y = input_frame.shape[0] / resized_frame.shape[0]

        for box, embedding in zip(boxes, embeddings):
           # distances = np.linalg.norm(stored_embeddings - embedding, axis=1) makes more comp intensive
            distances = np.sum((stored_embeddings - embedding) ** 2, axis=1)
            min_idx = np.argmin(distances)
            min_distance = distances[min_idx]

            name = name_list[min_idx] if min_distance < DISTANCE_THRESHOLD else "Unknown"

            x1, y1, x2, y2 = box
            x1 = int(x1 * scale_x)
            y1 = int(y1 * scale_y)
            x2 = int(x2 * scale_x)
            y2 = int(y2 * scale_y)

            recognized.append(((x1, y1, x2, y2), name))

    recognized_faces = recognized
    processing_time = time.time() - start_time
    processing = False

print("Press 'q' to quit.")
start_time = time.time()

try:
    while time.time() - start_time < 10:
        ret, frame = cap.read()
        if not ret:
            break

        frame_counter += 1
        display_frame = frame.copy()

        if frame_counter % 3 == 0 and not processing:
            with task_lock:
                if executor._work_queue.qsize() < 2:
                    processing = True
                    resized_frame = cv2.resize(frame, (320, 240))  # Downsize for faster processing
                    #threading.Thread(target=process_frame, args=(frame.copy(), resized_frame)).start()
                    executor.submit(process_frame, frame.copy(), resized_frame)

        # Draw recognized boxes
        for box, name in recognized_faces:
            x1, y1, x2, y2 = box
            cv2.rectangle(display_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(display_frame, name, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # Display FPS
        fps_text = f"FPS: {1/processing_time:.2f}" if processing_time > 0 else "FPS: N/A"
        cv2.putText(display_frame, fps_text, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        # Show the frame
        cv2.imshow("Hybrid Real-Time Face Recognition", display_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

finally:
    cap.release()
    cv2.destroyAllWindows()


Using device: cpu
Press 'q' to quit.
