In [8]:
import cv2
import numpy as np
from mtcnn.mtcnn import MTCNN
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.preprocessing import Normalizer
from PIL import Image
import pickle
import os
import json

In [18]:
# Load the pre-trained AlexNet model from ImageNet
alexnet = models.alexnet(pretrained=True)

# Modify the classifier to output 128-dimensional embeddings
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 128)
alexnet.eval()  # Set model to evaluation mode

# Define normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

l2_normalizer = Normalizer('l2')
detector = MTCNN()




In [19]:
#Defining Helper Function
def preprocess_face(face):
    face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    face_pil = Image.fromarray(face)
    face_tensor = transform(face_pil).unsqueeze(0)  # Add batch dimension
    return face_tensor

def get_embedding(model, face):
    face_tensor = preprocess_face(face)
    with torch.no_grad():
        embedding = model(face_tensor).numpy()
    embedding = l2_normalizer.transform(embedding)
    return embedding

def detect_faces(frame):
    faces = detector.detect_faces(frame)
    face_images = []
    for face in faces:
        x, y, width, height = face['box']
        face_images.append(frame[y:y+height, x:x+width])
    return face_images, faces

def save_embeddings(embeddings, labels, file_path):
    with open(file_path, 'wb') as f:
        pickle.dump((embeddings, labels), f)

def load_embeddings(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    return ([], [])


In [24]:
def capture_known_faces(file_path='face_data.pkl'):
    cap = cv2.VideoCapture(0)
    known_embeddings, known_labels = load_embeddings(file_path)
    name = input("Enter your name: ")

    count = 0
    while count < 5:  # Capture 5 images
        ret, frame = cap.read()
        if not ret:
            break

        face_images, faces = detect_faces(frame)

        for i, face_img in enumerate(face_images):
            embedding = get_embedding(alexnet, face_img)
            known_embeddings.append(embedding)
            known_labels.append(name)
            count += 1

            x, y, width, height = faces[i]['box']
            cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)
            cv2.putText(frame, f"Captured {count}/5", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        cv2.imshow('Capture Known Faces', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    save_embeddings(known_embeddings, known_labels, file_path)
    return known_embeddings, known_labels


In [25]:
def recognize_faces(model, file_path='face_data.pkl', output_file='output.json'):
    known_embeddings, known_labels = load_embeddings(file_path)
    cap = cv2.VideoCapture(0)
    
    results = []
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        face_images, faces = detect_faces(frame)

        for i, face_img in enumerate(face_images):
            embedding = get_embedding(model, face_img)
            min_dist, identity = float("inf"), None

            for j, known_emb in enumerate(known_embeddings):
                dist = np.linalg.norm(embedding - known_emb)
                if dist < min_dist:
                    min_dist = dist
                    identity = known_labels[j]

            x, y, width, height = faces[i]['box']
            result = {
                'box': [x, y, width, height],
                'identity': identity if min_dist < 0.6 else 'Unknown',
                'distance': float(min_dist)
            }
            results.append(result)

            if min_dist < 0.6:
                cv2.putText(frame, identity, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)
            else:
                cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 0, 255), 2)

        cv2.imshow('Real-time Face Recognition', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    with open(output_file, 'w') as f:
        json.dump(results, f, indent=4)


In [27]:
# Main flow
while True:
    print("1. Capture new face data")
    print("2. Recognize faces")
    print("3. Quit")
    choice = input("Enter your choice: ")

    if choice == '1':
        capture_known_faces()
    elif choice == '2':
        recognize_faces(alexnet)
    elif choice == '3':
        break
    else:
        print("Invalid choice. Please try again.")


1. Capture new face data
2. Recognize faces
3. Quit


Enter your choice:  1
Enter your name:  sumathi


1. Capture new face data
2. Recognize faces
3. Quit


Enter your choice:  2


1. Capture new face data
2. Recognize faces
3. Quit


Enter your choice:  3
