In [2]:
import cv2
import pickle
import numpy as np
from scipy.spatial.distance import cosine
from facenet_helper import FaceNetLoader 
import os



In [1]:
# Path to the encodings file we created in the previous step
ENCODINGS_FILE = "encodings_custom.pickle"

# Threshold for FaceNet (Cosine Distance)
# Lower = Stricter (Less False Positives, might miss you)
# Higher = Looser (More False Positives)
# 0.4 - 0.5 is usually good for FaceNet
THRESHOLD = 0.5

In [4]:
if not os.path.exists(ENCODINGS_FILE):
    print(f"[ERROR] Encodings file '{ENCODINGS_FILE}' not found! Please run the training step first.")
else:
    print("[INFO] Loading encodings database...")
    data = pickle.loads(open(ENCODINGS_FILE, "rb").read())
    database_embeddings = np.array(data["encodings"])
    database_names = data["names"]
    print(f"[INFO] Database loaded. Found {len(database_names)} faces.")

    # 2. Initialize FaceNet Loader (Uses keras-facenet internally)
    # We don't pass a path, so it uses the library's default high-quality weights
    facenet = FaceNetLoader()

    # 3. Initialize Face Detector (Haar Cascade is fast for Real-time)
    # This path loads the default xml from opencv library
    cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    face_cascade = cv2.CascadeClassifier(cascade_path)

    # 4. Open Webcam
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("[ERROR] Could not access the webcam.")
    else:
        print("[INFO] Starting Video Stream... Press 'q' on the video window to stop.")

        while True:
            ret, frame = cap.read()
            if not ret:
                print("[ERROR] Failed to grab frame.")
                break
            
            # Create a copy for drawing
            display_frame = frame.copy()
            
            # Convert to RGB for processing (FaceNet needs RGB)
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # Detect faces
            # scaleFactor=1.1, minNeighbors=5 are standard tuning params
            faces = face_cascade.detectMultiScale(rgb_frame, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
            
            for (x, y, w, h) in faces:
                # --- Step 1: Crop the face ---
                face_roi = rgb_frame[y:y+h, x:x+w]
                
                try:
                    # --- Step 2: Get Embedding ---
                    # The helper handles resizing to 160x160 and preprocessing
                    current_embedding = facenet.get_embedding(face_roi)
                    
                    if current_embedding is None:
                        continue

                    # --- Step 3: Compare with Database ---
                    distances = []
                    for db_emb in database_embeddings:
                        # Calculate Cosine Distance
                        dist = cosine(current_embedding, db_emb)
                        distances.append(dist)
                    
                    distances = np.array(distances)
                    
                    # Find the minimum distance (the closest match)
                    if len(distances) > 0:
                        min_dist_idx = np.argmin(distances)
                        min_dist = distances[min_dist_idx]
                        
                        # --- Step 4: Decision ---
                        if min_dist < THRESHOLD:
                            name = database_names[min_dist_idx]
                            color = (0, 255, 0) # Green
                            label_text = f"{name} ({min_dist:.2f})"
                        else:
                            name = "Unknown"
                            color = (0, 0, 255) # Red
                            label_text = f"Unknown ({min_dist:.2f})"
                    else:
                        name = "Unknown"
                        color = (0, 0, 255)
                        label_text = "Unknown"

                    # --- Step 5: Draw on Screen ---
                    # Rectangle around face
                    cv2.rectangle(display_frame, (x, y), (x+w, y+h), color, 2)
                    
                    # Label background
                    cv2.rectangle(display_frame, (x, y - 35), (x+w, y), color, cv2.FILLED)
                    
                    # Name text
                    cv2.putText(display_frame, label_text, (x + 6, y - 6), 
                                cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1)
                    
                except Exception as e:
                    print(f"[WARNING] Error processing face: {e}")
                    continue

            # Show the frame
            cv2.imshow("FaceNet Real-Time Recognition", display_frame)

            # Break loop with 'q' key
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        # Cleanup
        cap.release()
        cv2.destroyAllWindows()
        print("[INFO] Program stopped.")

[INFO] Loading encodings database...
[INFO] Database loaded. Found 618 faces.
[INFO] Initializing FaceNet...
[INFO] Model loaded successfully.
[INFO] Starting Video Stream... Press 'q' on the video window to stop.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0