In [3]:
!pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.0-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
Using cached joblib-1.5.0-py3-none-any.whl (307 kB)
Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [scikit-learn]━━━━━[0m [32m2/3[0m [scikit-learn]
[1A[2KSuccessfully installed joblib-1.5.0 scikit-learn-1.6.1 threadpoolctl-3.6.0


In [10]:
import os
import pickle
import cv2
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
from torchvision import models, transforms
from sklearn.metrics.pairwise import cosine_similarity
from ultralytics import YOLO

# Load YOLOv12 model
model_path = '/home/himanshu/Downloads/face_detector_yolov12.pt'
model = YOLO(model_path)
model.eval()  # Set model to evaluation mode

# Preprocessing transform for ResNet-50
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load pretrained ResNet-50
resnet = models.resnet50(pretrained=True)
resnet.eval()

# Define the layers to extract features from
feature_layers_indices = [4, 7]  # Example: after layer1 and layer4

# ======= FACE EMBEDDING FUNCTION (Flatten and Concatenate) =======
def get_face_embedding(face_image):
    img = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
    input_tensor = transform(img).unsqueeze(0).to(next(resnet.parameters()).device)
    flattened_embeddings = []
    with torch.no_grad():
        x = input_tensor
        for i, module in enumerate(resnet.children()):
            x = module(x)
            if i in feature_layers_indices:
                embedding = torch.flatten(x, start_dim=1).cpu().numpy() # Flatten from the channel dimension onwards
                flattened_embeddings.append(embedding)
            elif i > max(feature_layers_indices) if feature_layers_indices else False:
                break

    if flattened_embeddings:
        concatenated_embedding = np.concatenate(flattened_embeddings, axis=1)
        return concatenated_embedding / np.linalg.norm(concatenated_embedding)
    else:
        return np.zeros(0)
# # ======= FACE RECOGNITION FUNCTION =======
def recognize_face(embedding, known_embeddings, threshold=0.63): # Adjust threshold
    best_match = "Unknown"
    best_score = -1.0

    if not known_embeddings or embedding.size == 0:
        return best_match

    for name, centroid in known_embeddings.items():
        # print(f"Shape of embedding: {embedding.shape}")
        # print(f"Shape of centroid ({name}): {centroid.shape}")
        score = cosine_similarity(embedding.reshape(1, -1), centroid.reshape(1, -1))[0][0]
        # print(f"Cosine similarity with {name}: {score:.4f}") # Print current score

        if score > best_score:
            best_score = score
            best_match = name

    return best_match if best_score > threshold else "Unknown"
# ======= FACE RECOGNITION FUNCTION (Using Current Score as Measurement) =======
# def recognize_face(embedding, known_embeddings, threshold=0.50): # Adjust threshold
#     for name, centroid in known_embeddings.items():
#         score = cosine_similarity(embedding.reshape(1, -1), centroid.reshape(1, -1))[0][0]
#         # print(f"Cosine similarity with {name}: {score:.4f}") # Print current score
#         if score > threshold:
#             return name  # Return the name if the current score exceeds the threshold

#     return "Unknown" # Return "Unknown" if no match is found after checking all

# ======= PLACEHOLDER: Your YOLOv12 face detector =======
# Replace this with actual face detection from YOLOv12
def detect_faces_yolov12(frame, conf_threshold=0.63):
    results = model.predict(source=frame, device='cpu', conf=conf_threshold, verbose=False)
    boxes = results[0].boxes

    detected_faces = []
    h, w, _ = frame.shape

    for box in boxes:
        x1, y1, x2, y2 = box.xyxy[0].tolist()

        # Expand box: top +15%, sides +4%
        box_width = x2 - x1
        box_height = y2 - y1

        x1 = max(0, int(x1 - 0.04 * box_width))
        x2 = min(w, int(x2 + 0.04 * box_width))
        y1 = max(0, int(y1 - 0.15 * box_height))
        y2 = min(h, int(y2))  # bottom stays same

        # Convert to (x, y, w, h)
        x = x1
        y = y1
        w_box = x2 - x1
        h_box = y2 - y1

        detected_faces.append((x, y, w_box, h_box))

    return detected_faces

# ======= ENROLLMENT AND WEIGHTED CLUSTERING (Flattened) =======
def enroll_faces_weighted(dataset_path):
    human_embeddings = {}
    for human_name in os.listdir(dataset_path):
        class_dir = os.path.join(dataset_path, human_name)
        if not os.path.isdir(class_dir):
            continue

        all_flattened_embeddings = []
        for img_file in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_file)
            try:
                face_img = Image.open(img_path).convert('RGB')
                face_np = np.array(face_img)
                emb = get_face_embedding(face_np)
                if emb.size > 0:
                    all_flattened_embeddings.append(emb)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")

        if all_flattened_embeddings:
            emb_array = np.array(all_flattened_embeddings) # Shape: (num_images, total_flattened_feature_dimension)
            variances = np.var(emb_array, axis=0) # Calculate variance across images for each feature
            # var_sq = np.square(variances)
            weights = 1.0 / (variances + 1e-5) # Shape: (total_flattened_feature_dimension,)
            normalized_weights = weights / np.sum(weights) # Shape: (total_flattened_feature_dimension,)

            # Reshape weights to be compatible with element-wise multiplication
            reshaped_weights = normalized_weights.reshape(1, -1)

            weighted_sum = np.sum(emb_array * reshaped_weights, axis=0)
            weighted_centroid = weighted_sum / np.sum(normalized_weights) # Should sum to 1

            human_embeddings[human_name] = weighted_centroid / np.linalg.norm(weighted_centroid)
        else:
            print(f"No valid embeddings found for {human_name}")

    return human_embeddings

# ======= MAIN SCRIPT =======
if __name__ == "__main__":
    dataset_path = "human_faces_dataset"  # Replace with your dataset path
    known_face_clusters = enroll_faces_weighted(dataset_path)

    cap = cv2.VideoCapture(0)

    # Variables to store the last recognized name and the time it was recognized
    last_recognized_name = "Unknown"
    recognition_start_time = 0
    persistence_duration = 1  # Keep the name for 1 second (in seconds)

    print("Starting face recognition... Press 'q' to quit.")
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Mirror the frame (flip horizontally)
        frame = cv2.flip(frame, 1)

        # Detect faces
        boxes = detect_faces_yolov12(frame)
        current_recognized_name = "Unknown"

        for (x, y, w, h) in boxes:
            x1 = int(x)
            y1 = int(y)
            x2 = int(x + w)
            y2 = int(y + h)

            # Expand box: top +15%, sides +4% (same as detection)
            box_width = x2 - x1
            box_height = y2 - y1

            x1_expanded = max(0, int(x1 - 0.04 * box_width))
            x2_expanded = min(frame.shape[1], int(x2 + 0.04 * box_width))
            y1_expanded = max(0, int(y1 - 0.15 * box_height))
            y2_expanded = min(frame.shape[0], int(y2))

            face_crop = frame[y1_expanded:y2_expanded, x1_expanded:x2_expanded]
            emb = get_face_embedding(face_crop)
            if emb.size > 0:
                name = recognize_face(emb, known_face_clusters, threshold=0.63) # Adjust threshold

                if name != "Unknown":
                    current_recognized_name = name
                    break # If a face is recognized, we can stop processing other boxes for this frame

        current_time = cv2.getTickCount() / cv2.getTickFrequency()

        if current_recognized_name != "Unknown":
            last_recognized_name = current_recognized_name
            recognition_start_time = current_time
        elif (current_time - recognition_start_time) < persistence_duration:
            # Keep the last recognized name for the persistence duration
            pass
        else:
            last_recognized_name = "Unknown"

        for (x, y, w, h) in boxes: # Draw rectangles and text for all detected faces
            x1 = int(x - 0.02 * w)
            y1 = int(y - 0.05 * h)
            x2 = int(x + w + 0.02 * w)
            y2 = int(y + h + 0.05 * h)
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(frame.shape[1], x2), min(frame.shape[0], y2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, last_recognized_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        cv2.imshow("Face Recognition", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()



Starting face recognition... Press 'q' to quit.
