In [1]:
import numpy as np
import cv2
import mediapipe as mp

In [2]:
"Temp DB"
Vect_DB = {}
n_features = 50
n_dim = 512
num_entries = 10
for i in range(num_entries):
    Vect_DB[i] = np.random.rand(n_features, n_dim)
#___________________________________________________________________________________________________________________________________

class FaceID:
    def __init__(self, db, buffer_size=100, ear_threshold=0.2, consecutive_frames=3):
        self.db = db
        self.buffer_size = buffer_size
        self.ear_threshold = ear_threshold
        self.consecutive_frames = consecutive_frames

        # Liveness detection.
        self.consecutive_closed = 0
        self.blink_count = 0
        self.live_detected = False

        # For storing the best frame.
        self.last_best = None
        self.frame_buffer = []

        # Set up MediaPipe Face Mesh.
        self.face_mesh = mp.solutions.face_mesh.FaceMesh(
            static_image_mode=False,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )

        # Define indices for the eyes.
        self.left_eye_indices = [33, 160, 158, 133, 153, 144]
        self.right_eye_indices = [263, 387, 385, 362, 380, 373]

    def cosine_similarity(self,a, b):
        a_norm = a / np.linalg.norm(a)
        b_norm = b / np.linalg.norm(b)
        return np.dot(a_norm, b_norm)

    def find_closest_person_max(self, query):
        scores = {}
        for key, vectors in self.db.items():
            sim_scores = [self.cosine_similarity(query, vec) for vec in vectors]
            scores[key] = np.max(sim_scores)
        closest_person = max(scores, key=scores.get)
        return closest_person, scores

    def get_embed(self, frame):return np.random.rand(512)

    def compute_ear(self, landmarks, eye_indices, image_width, image_height):
        
        coords = []
        for idx in eye_indices:
            lm = landmarks[idx]
            coords.append((int(lm.x * image_width), int(lm.y * image_height)))
        p1, p2, p3, p4, p5, p6 = coords
        vertical1 = np.linalg.norm(np.array(p2) - np.array(p6))
        vertical2 = np.linalg.norm(np.array(p3) - np.array(p5))
        horizontal = np.linalg.norm(np.array(p1) - np.array(p4))
        ear = (vertical1 + vertical2) / (2.0 * horizontal)
        return ear

    def infer(self, frame):
        
        image_height, image_width = frame.shape[:2]
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.face_mesh.process(rgb_frame)

        closest_person = None
        similarity_scores = None

        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0].landmark

            
            left_ear = self.compute_ear(landmarks, self.left_eye_indices, image_width, image_height)
            right_ear = self.compute_ear(landmarks, self.right_eye_indices, image_width, image_height)
            avg_ear = (left_ear + right_ear) / 2.0

            
            cv2.putText(frame, f'EAR: {avg_ear:.2f}', (30, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            
            embed = self.get_embed(frame)
            closest_person, similarity_scores = self.find_closest_person_max(embed)
            cv2.putText(frame, f'Closest: {closest_person}', (30, 70),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            
            if avg_ear < self.ear_threshold:
                self.consecutive_closed += 1
            else:
                if self.consecutive_closed >= self.consecutive_frames:
                    self.blink_count += 1
                    self.live_detected = True
                self.consecutive_closed = 0
                

            cv2.putText(frame, f'Blinks: {self.blink_count}', (30, 110),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            
            """self.frame_buffer.append((frame.copy(), avg_ear))
            if len(self.frame_buffer) >= self.buffer_size:
                best_frame, best_score = max(self.frame_buffer, key=lambda x: x[1])
                self.last_best = best_frame
                self.frame_buffer = []"""
        else:
            cv2.putText(frame, "No face detected", (30, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        return frame, closest_person, self.live_detected, self.last_best


In [3]:
inference_engine = FaceID(db=Vect_DB, buffer_size=100)
cap = cv2.VideoCapture(0)
counter = 0
infer_interval = 1
while cap.isOpened():
    ret, frame = cap.read()
    counter+=1
    if not ret:
        break
    if counter%infer_interval == 0:

        processed_frame, closest_person, live_flag, best_frame = inference_engine.infer(frame)
        cv2.imshow("Processed Frame", processed_frame)

        """if best_frame is not None:
            cv2.imshow("Best Frame", best_frame)"""

        key = cv2.waitKey(1) & 0xFF
        if key == 27:
            break
        counter = 0

cap.release()
cv2.destroyAllWindows()