In [None]:
!pip install torch torchvision opencv-python facenet-pytorch
!pip install numpy
!git clone https://github.com/ultralytics/yolov5
!cd yolov5
!pip install -r requirements.txt


In [2]:
import cv2
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import os
import numpy as np

# Load YOLOv5 model for face mask detection
yolo_model = torch.hub.load('yolov5', 'custom', path='last.pt', source='local')

# Define class names based on your YOLOv5 model training
class_names = ['without_mask', 'with_mask', 'mask_weared_incorrect']

# Initialize MTCNN and FaceNet for face recognition
mtcnn = MTCNN(image_size=160, margin=20, keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu')
facenet = InceptionResnetV1(pretrained='vggface2').eval()
if torch.cuda.is_available():
    facenet = facenet.cuda()

# Modified function to load known faces with and without masks
def load_known_faces(known_dir='known_faces'):
    embeddings = {}
    
    # If the directory is flat (contains only image files)
    if all(os.path.isfile(os.path.join(known_dir, f)) for f in os.listdir(known_dir) if not f.startswith('.')):
        # Create a default person name based on directory name
        person_name = os.path.basename(known_dir)
        embeddings[person_name] = {
            'with_mask': [],
            'without_mask': []
        }
        
        for img_file in os.listdir(known_dir):
            if not img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                continue
                
            path = os.path.join(known_dir, img_file)
            is_masked = 'mask' in img_file.lower()
            
            # Try both regular and flipped images for better recognition
            img = Image.open(path).convert('RGB')
            img_flipped = img.transpose(Image.FLIP_LEFT_RIGHT)
            
            # Process original image
            faces = mtcnn(img)
            if faces is not None:
                if isinstance(faces, list):
                    face = faces[0] if faces else None
                else:
                    face = faces
                    
                if face is not None:
                    if torch.cuda.is_available():
                        face = face.cuda()
                    emb = facenet(face.unsqueeze(0) if len(face.shape) == 3 else face).detach()
                    
                    if is_masked:
                        embeddings[person_name]['with_mask'].append(emb)
                    else:
                        embeddings[person_name]['without_mask'].append(emb)
            
            # Process flipped image
            faces_flipped = mtcnn(img_flipped)
            if faces_flipped is not None:
                if isinstance(faces_flipped, list):
                    face_flipped = faces_flipped[0] if faces_flipped else None
                else:
                    face_flipped = faces_flipped
                    
                if face_flipped is not None:
                    if torch.cuda.is_available():
                        face_flipped = face_flipped.cuda()
                    emb_flipped = facenet(face_flipped.unsqueeze(0) if len(face_flipped.shape) == 3 else face_flipped).detach()
                    
                    if is_masked:
                        embeddings[person_name]['with_mask'].append(emb_flipped)
                    else:
                        embeddings[person_name]['without_mask'].append(emb_flipped)
    else:
        # Process hierarchical directory structure
        for person_name in os.listdir(known_dir):
            person_dir = os.path.join(known_dir, person_name)
            
            if os.path.isdir(person_dir):
                embeddings[person_name] = {
                    'with_mask': [],
                    'without_mask': []
                }
                
                for img_file in os.listdir(person_dir):
                    if not img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                        continue
                        
                    path = os.path.join(person_dir, img_file)
                    is_masked = 'mask' in img_file.lower()
                    
                    # Try both regular and flipped images
                    img = Image.open(path).convert('RGB')
                    img_flipped = img.transpose(Image.FLIP_LEFT_RIGHT)
                    
                    # Process original image
                    faces = mtcnn(img)
                    if faces is not None:
                        if isinstance(faces, list):
                            face = faces[0] if faces else None
                        else:
                            face = faces
                            
                        if face is not None:
                            if torch.cuda.is_available():
                                face = face.cuda()
                            emb = facenet(face.unsqueeze(0) if len(face.shape) == 3 else face).detach()
                            
                            if is_masked:
                                embeddings[person_name]['with_mask'].append(emb)
                            else:
                                embeddings[person_name]['without_mask'].append(emb)
                    
                    # Process flipped image
                    faces_flipped = mtcnn(img_flipped)
                    if faces_flipped is not None:
                        if isinstance(faces_flipped, list):
                            face_flipped = faces_flipped[0] if faces_flipped else None
                        else:
                            face_flipped = faces_flipped
                            
                        if face_flipped is not None:
                            if torch.cuda.is_available():
                                face_flipped = face_flipped.cuda()
                            emb_flipped = facenet(face_flipped.unsqueeze(0) if len(face_flipped.shape) == 3 else face_flipped).detach()
                            
                            if is_masked:
                                embeddings[person_name]['with_mask'].append(emb_flipped)
                            else:
                                embeddings[person_name]['without_mask'].append(emb_flipped)
    
    return embeddings

# Load known faces
print("Loading known faces...")
known_embeddings = load_known_faces()
print(f"Loaded {len(known_embeddings)} persons")
for person, data in known_embeddings.items():
    print(f"  - {person}: {len(data['without_mask'])} without mask, {len(data['with_mask'])} with mask")

# Modified function to recognize faces with or without masks
def recognize_face(face_embedding, mask_status, threshold=0.9):
    min_dist = float('inf')
    identity = "Unknown"
    
    # Compare with embeddings from all categories to improve recognition
    for person_name, emb_dict in known_embeddings.items():
        # Try both mask categories for better recognition
        for compare_type in ['with_mask', 'without_mask']:
            if not emb_dict[compare_type]:
                continue
            
            # Compare with all embeddings of this type
            for known_emb in emb_dict[compare_type]:
                dist = (face_embedding - known_emb).norm().item()
                if dist < min_dist:
                    min_dist = dist
                    identity = person_name
    
    # Apply threshold
    return identity if min_dist < threshold else "Unknown", min_dist

# Start webcam for real-time detection
cap = cv2.VideoCapture(0)
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Flip the frame horizontally to correct the mirror effect
    frame = cv2.flip(frame, 1)
    
    # Create a copy for display
    display_frame = frame.copy()
    
    # Perform detection with YOLOv5
    results = yolo_model(frame)
    detections = results.xyxy[0]  # x1, y1, x2, y2, conf, class
    
    for det in detections:
        x1, y1, x2, y2, conf, cls = det.cpu().numpy()
        x1, y1, x2, y2, cls = int(x1), int(y1), int(x2), int(y2), int(cls)
        
        # Skip detections with low confidence
        if conf < 0.5:
            continue
            
        face_crop = frame[y1:y2, x1:x2]
        
        if face_crop.size == 0:
            continue
        
        # Convert face to RGB and process with MTCNN
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        face_pil = Image.fromarray(face_rgb)
        
        # Also try with a flipped version of the face
        face_pil_flipped = face_pil.transpose(Image.FLIP_LEFT_RIGHT)
        
        # Process both regular and flipped face
        face_tensors = mtcnn(face_pil)
        face_tensors_flipped = mtcnn(face_pil_flipped)
        
        identity = "Unknown"
        min_distance = float('inf')
        
        # Get mask status from the model
        original_mask_status = class_names[cls]
        
        # Custom mask detection rule: 
        # If without_mask confidence is > 90, person is wearing mask
        # If without_mask confidence is < 90, person is not wearing mask
        without_mask_conf = conf * 100 if cls == 0 else 0
        
        # Apply the custom rule
        if original_mask_status == 'without_mask':
            if without_mask_conf > 91.8:
                custom_mask_status = "with_mask"
            else:
                custom_mask_status = "without_mask"
        else:
            custom_mask_status = original_mask_status
        
        # Try recognition with regular face
        if face_tensors is not None:
            if isinstance(face_tensors, list):
                face_tensor = face_tensors[0] if face_tensors else None
            else:
                face_tensor = face_tensors
                
            if face_tensor is not None:
                if torch.cuda.is_available():
                    face_tensor = face_tensor.cuda()
                emb = facenet(face_tensor.unsqueeze(0) if len(face_tensor.shape) == 3 else face_tensor)
                name, distance = recognize_face(emb, custom_mask_status)
                
                if distance < min_distance:
                    identity = name
                    min_distance = distance
        
        # Try recognition with flipped face
        if face_tensors_flipped is not None:
            if isinstance(face_tensors_flipped, list):
                face_tensor_flipped = face_tensors_flipped[0] if face_tensors_flipped else None
            else:
                face_tensor_flipped = face_tensors_flipped
                
            if face_tensor_flipped is not None:
                if torch.cuda.is_available():
                    face_tensor_flipped = face_tensor_flipped.cuda()
                emb_flipped = facenet(face_tensor_flipped.unsqueeze(0) if len(face_tensor_flipped.shape) == 3 else face_tensor_flipped)
                name_flipped, distance_flipped = recognize_face(emb_flipped, custom_mask_status)
                
                if distance_flipped < min_distance:
                    identity = name_flipped
                    min_distance = distance_flipped
        
        # Determine color based on custom mask status
        if custom_mask_status == "with_mask":
            color = (0, 255, 0)  # Green
        elif custom_mask_status == "mask_weared_incorrect":
            color = (0, 165, 255)  # Orange
        else:
            color = (0, 0, 255)  # Red
        
        # Draw bounding box and label
        label = f"{identity} - {custom_mask_status} ({min_distance:.2f})"
        cv2.rectangle(display_frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(display_frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
        
        # Display the without_mask confidence for debugging
        cv2.putText(display_frame, f"Without mask conf: {without_mask_conf:.1f}", 
                   (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
        
        # Show both original and custom status
        if original_mask_status != custom_mask_status:
            cv2.putText(display_frame, f"Model: {original_mask_status}, Custom: {custom_mask_status}", 
                       (x1, y2 + 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    
    # Display detection information
    if len(detections) > 0:
        text = f"Detections: {len(detections)}"
        cv2.putText(display_frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    
    cv2.imshow("Face & Mask Recognition", display_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

YOLOv5  v7.0-411-gf4d8a84c Python-3.12.4 torch-2.2.2+cpu CPU

Fusing layers... 
Model summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


Loading known faces...
Loaded 1 persons
  - sushmit: 10 without mask, 0 with mask
Press 'q' to quit
