In [2]:
pip install facenet-pytorch opencv-python torch torchvision

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
import cv2
import numpy as np
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import warnings

warnings.filterwarnings("ignore")

class FaceRecognition:
    def __init__(self):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Properly initialized MTCNN
        self.mtcnn = MTCNN(
            image_size=160,
            margin=20,
            min_face_size=40,
            thresholds=[0.6, 0.7, 0.7],
            factor=0.709,
            post_process=True,
            device=self.device
        )
        
        # Properly initialized Resnet
        self.resnet = InceptionResnetV1(
            pretrained='vggface2',
            classify=False
        ).eval().to(self.device)
        
        self.known_embeddings = {}
        self.known_names = []

    def load_database(self, db_path="known_faces"):
        print("\n=== Loading Database ===")
        
        # Skip hidden directories
        person_dirs = [d for d in os.listdir(db_path) if not d.startswith('.')]
        
        for person_name in person_dirs:
            person_dir = os.path.join(db_path, person_name)
            if not os.path.isdir(person_dir):
                continue
                
            print(f"\nProcessing {person_name}:")
            embeddings = []
            
            image_files = [f for f in os.listdir(person_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            for img_file in image_files:
                img_path = os.path.join(person_dir, img_file)
                try:
                    img = Image.open(img_path).convert('RGB')
                    
                    # Detect face and get properly aligned face tensor
                    face = self.mtcnn(img)
                    if face is None:
                        print(f"  ✗ {img_file}: No face detected")
                        continue
                        
                    # Ensure proper tensor dimensions [1, 3, 160, 160]
                    if face.dim() == 3:
                        face = face.unsqueeze(0)
                    
                    # Generate embedding
                    embedding = self.resnet(face.to(self.device)).detach().cpu()
                    embeddings.append(embedding)
                    print(f"  ✓ {img_file}: Success")
                    
                except Exception as e:
                    print(f"  ✗ {img_file}: Error - {str(e)}")
            
            if embeddings:
                self.known_embeddings[person_name] = torch.cat(embeddings)
                self.known_names.append(person_name)
        
        print("\n=== Database Summary ===")
        print(f"Loaded {len(self.known_names)} people")
        for name in self.known_names:
            print(f"- {name}: {self.known_embeddings[name].shape[0]} samples")

    def recognize(self, frame):
        try:
            img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            
            # Detect faces
            boxes, probs, landmarks = self.mtcnn.detect(img, landmarks=True)
            if boxes is None:
                return []
            
            results = []
            for i, (box, prob) in enumerate(zip(boxes, probs)):
                if prob < 0.9:
                    continue
                    
                x1, y1, x2, y2 = map(int, box)
                face_img = img.crop((x1, y1, x2, y2))
                
                # Get aligned face
                face = self.mtcnn(face_img)
                if face is None:
                    continue
                    
                # Ensure proper tensor dimensions
                if face.dim() == 3:
                    face = face.unsqueeze(0)
                
                # Generate embedding
                embedding = self.resnet(face.to(self.device)).detach().cpu()
                
                # Find matches
                name, confidence = self._match_face(embedding)
                print(f"Match: {name} (Confidence: {confidence:.2f})")
                
                results.append({
                    'box': box,
                    'name': name,
                    'confidence': confidence
                })
            
            return results
            
        except Exception as e:
            print(f"Recognition error: {str(e)}")
            return []

    def _match_face(self, embedding):
        if not self.known_embeddings:
            return "Unknown", 0.0
            
        min_dist = float('inf')
        best_name = "Unknown"
        
        for name, ref_embeddings in self.known_embeddings.items():
            dists = torch.cdist(embedding, ref_embeddings)
            current_min = dists.min().item()
            
            if current_min < min_dist:
                min_dist = current_min
                best_name = name
        
        threshold = 0.7  # Optimal threshold for face recognition
        confidence = max(0.0, 1.0 - min(min_dist / threshold, 1.0))
        
        return (best_name, confidence) if min_dist < threshold else ("Unknown", 0.0)

def main():
    print("=== Face Recognition System ===")
    recognizer = FaceRecognition()
    
    # Load database
    try:
        recognizer.load_database("known_faces")
    except Exception as e:
        print(f"Failed to load database: {str(e)}")
        return
    
    # Initialize camera
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open camera")
        return
    
    print("\nStarting recognition... (Press Q to quit)")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture frame")
            break
        
        frame = cv2.flip(frame, 1)
        faces = recognizer.recognize(frame)
        
        # Draw results
        for face in faces:
            x1, y1, x2, y2 = map(int, face['box'])
            color = (0, 255, 0) if face['name'] != "Unknown" else (0, 0, 255)
            label = f"{face['name']} ({face['confidence']:.2f})"
            
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1-10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
        
        cv2.imshow("Face Recognition", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
    print("System stopped")

if __name__ == "__main__":
    main()

=== Face Recognition System ===

=== Loading Database ===

Processing Amar:
  ✓ Amar_1.jpg: Success
  ✗ person1_2.jpg: No face detected

Processing Arpita_singh:
  ✓ Arpita_singh.png: Success

Processing person1:
  ✗ sample.jpg: No face detected

Processing person2:
  ✓ person2_1.jpg: Success
  ✓ person2_1749916628.jpg: Success

Processing ronaldo:
  ✓ ronaldo_1.jpg: Success

=== Database Summary ===
Loaded 4 people
- Amar: 1 samples
- Arpita_singh: 1 samples
- person2: 2 samples
- ronaldo: 1 samples

Starting recognition... (Press Q to quit)
Match: person2 (Confidence: 0.10)
Match: Unknown (Confidence: 0.00)
Match: person2 (Confidence: 0.03)
Match: person2 (Confidence: 0.13)
Match: Unknown (Confidence: 0.00)
Match: person2 (Confidence: 0.02)
Match: Unknown (Confidence: 0.00)
Match: Unknown (Confidence: 0.00)
Match: Unknown (Confidence: 0.00)
Match: Unknown (Confidence: 0.00)
Match: Unknown (Confidence: 0.00)
Match: Unknown (Confidence: 0.00)
Match: Unknown (Confidence: 0.00)
Match: Un

In [1]:
import os
import cv2
import numpy as np
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import warnings
from datetime import datetime

warnings.filterwarnings("ignore")

class FaceRecognition:
    def __init__(self):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.mtcnn = MTCNN(image_size=160, margin=20, min_face_size=40, 
                          thresholds=[0.6, 0.7, 0.7], device=self.device)
        self.resnet = InceptionResnetV1(pretrained='vggface2', classify=False).eval().to(self.device)
        self.known_embeddings = {}
        self.known_names = []
        self.recognition_history = []
        self.frame_count = 0
        self.fps = 0
        self.last_time = datetime.now()

    def load_database(self, db_path="known_faces"):
        print("\n=== Loading Database ===")
        person_dirs = [d for d in os.listdir(db_path) if not d.startswith('.')]
        
        for person_name in person_dirs:
            person_dir = os.path.join(db_path, person_name)
            if not os.path.isdir(person_dir):
                continue
                
            print(f"\nProcessing {person_name}:")
            embeddings = []
            
            image_files = [f for f in os.listdir(person_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            for img_file in image_files:
                img_path = os.path.join(person_dir, img_file)
                try:
                    img = Image.open(img_path).convert('RGB')
                    face = self.mtcnn(img)
                    if face is None:
                        print(f"  ✗ {img_file}: No face detected")
                        continue
                        
                    if face.dim() == 3:
                        face = face.unsqueeze(0)
                    
                    embedding = self.resnet(face.to(self.device)).detach().cpu()
                    embeddings.append(embedding)
                    print(f"  ✓ {img_file}: Success")
                    
                except Exception as e:
                    print(f"  ✗ {img_file}: Error - {str(e)}")
            
            if embeddings:
                self.known_embeddings[person_name] = torch.cat(embeddings)
                self.known_names.append(person_name)
        
        print("\n=== Database Summary ===")
        print(f"Loaded {len(self.known_names)} people")

    def recognize(self, frame):
        try:
            self.frame_count += 1
            current_time = datetime.now()
            if (current_time - self.last_time).seconds >= 1:
                self.fps = self.frame_count
                self.frame_count = 0
                self.last_time = current_time
            
            img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            boxes, probs, _ = self.mtcnn.detect(img, landmarks=True)
            
            results = []
            if boxes is not None:
                for i, (box, prob) in enumerate(zip(boxes, probs)):
                    if prob < 0.9:
                        continue
                        
                    x1, y1, x2, y2 = map(int, box)
                    face_img = img.crop((x1, y1, x2, y2))
                    face = self.mtcnn(face_img)
                    
                    if face is not None:
                        if face.dim() == 3:
                            face = face.unsqueeze(0)
                        
                        embedding = self.resnet(face.to(self.device)).detach().cpu()
                        name, confidence = self._match_face(embedding)
                        
                        if name != "Unknown":
                            self.recognition_history.append((name, confidence))
                            if len(self.recognition_history) > 5:
                                self.recognition_history.pop(0)
                        
                        results.append({
                            'box': box,
                            'name': name,
                            'confidence': confidence
                        })
            
            return results
            
        except Exception as e:
            print(f"Recognition error: {str(e)}")
            return []

    def _match_face(self, embedding):
        if not self.known_embeddings:
            return "Unknown", 0.0
            
        min_dist = float('inf')
        best_name = "Unknown"
        
        for name, ref_embeddings in self.known_embeddings.items():
            dists = torch.cdist(embedding, ref_embeddings)
            current_min = dists.min().item()
            
            if current_min < min_dist:
                min_dist = current_min
                best_name = name
        
        threshold = 0.7
        confidence = max(0.0, 1.0 - min(min_dist / threshold, 1.0))
        
        return (best_name, confidence) if min_dist < threshold else ("Unknown", 0.0)

def draw_info_panel(frame, recognizer, width=300):
    """Draw attractive information panel on the left side"""
    height = frame.shape[0]
    panel = np.zeros((height, width, 3), dtype=np.uint8)
    
    # Gradient background
    cv2.rectangle(panel, (0, 0), (width, height), (40, 40, 60), -1)
    for i in range(height):
        alpha = i/height
        color = tuple(int(40 + alpha*40) for _ in range(3))
        cv2.line(panel, (0, i), (width, i), color, 1)
    
    # System info
    cv2.putText(panel, "FACE RECOGNITION SYSTEM", (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 255), 2)
    
    # FPS counter
    cv2.putText(panel, f"FPS: {recognizer.fps}", (10, 70), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
    
    # Database info
    cv2.putText(panel, f"Known Faces: {len(recognizer.known_names)}", (10, 110), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)
    
    # Recent recognitions
    cv2.putText(panel, "Recent Recognitions:", (10, 160), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 255), 1)
    
    for i, (name, conf) in enumerate(recognizer.recognition_history):
        y_pos = 190 + i * 30
        cv2.putText(panel, f"{name}: {conf:.2f}", (20, y_pos), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    
    # Instructions
    cv2.putText(panel, "Instructions:", (10, height-100), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 0), 1)
    cv2.putText(panel, "Press 'Q' to quit", (20, height-70), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
    
    # Add logo or decorative elements
    cv2.rectangle(panel, (width//2-30, height-40), (width//2+30, height-20), 
                  (0, 150, 255), -1)
    cv2.putText(panel, "AI", (width//2-15, height-25), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    return np.hstack((panel, frame))

def main():
    print("=== Face Recognition System ===")
    recognizer = FaceRecognition()
    
    try:
        recognizer.load_database("known_faces")
    except Exception as e:
        print(f"Failed to load database: {str(e)}")
        return
    
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open camera")
        return
    
    print("\nStarting recognition... (Press Q to quit)")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture frame")
            break
        
        frame = cv2.flip(frame, 1)
        faces = recognizer.recognize(frame)
        
        for face in faces:
            x1, y1, x2, y2 = map(int, face['box'])
            color = (0, 255, 0) if face['name'] != "Unknown" else (0, 0, 255)
            label = f"{face['name']} ({face['confidence']:.2f})"
            
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1-10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
        
        # Add information panel
        frame = draw_info_panel(frame, recognizer)
        
        cv2.imshow("Face Recognition System", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
    print("System stopped")

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


=== Face Recognition System ===

=== Loading Database ===

Processing Amar:
  ✓ Amar_1.jpg: Success
  ✗ person1_2.jpg: No face detected

Processing Arpita_singh:
  ✓ Arpita_singh.png: Success

Processing person1:
  ✗ sample.jpg: No face detected

Processing person2:
  ✓ person2_1.jpg: Success
  ✓ person2_1749916628.jpg: Success

Processing ronaldo:
  ✓ ronaldo_1.jpg: Success

=== Database Summary ===
Loaded 4 people

Starting recognition... (Press Q to quit)
System stopped
