In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1
from torchvision import transforms
from PIL import Image
import numpy as np
import warnings
import torch
import cv2
warnings.filterwarnings('ignore')

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cpu


In [4]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained=None,
    num_classes=1
).to(device)

In [5]:
resnet.load_state_dict(torch.load('facenet_model_final.pth'))
resnet.eval()

InceptionResnetV1(
  (conv2d_1a): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2a): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2b): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2d_3b): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_4a): 

In [6]:
mtcnn = MTCNN(keep_all=True, device=device)

In [7]:
def preprocess(face):
    transform = transforms.Compose([
        transforms.Resize((160, 160)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return transform(face).unsqueeze(0)

In [8]:
# Function to capture an image from the webcam and detect faces
def capture_and_detect():
    cap = cv2.VideoCapture(0)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Convert the frame to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Detect faces
        boxes, _ = mtcnn.detect(rgb_frame)
        
        if boxes is not None:
            for box in boxes:
                # Draw a rectangle around the face
                frame = cv2.rectangle(frame, 
                                      (int(box[0]), int(box[1])), 
                                      (int(box[2]), int(box[3])), 
                                      (0, 255, 0), 2)
                
                # Crop and preprocess the face
                face = Image.fromarray(rgb_frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])])
                face_tensor = preprocess(face)
                
                # Perform face recognition
                with torch.no_grad():
                    embedding = resnet(face_tensor)
                
                # Display the result on the frame
                label = 'MHossein'  # Replace this with actual label based on your recognition logic
                cv2.putText(frame, label, (int(box[0]), int(box[1]) - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        
        # Show the frame
        cv2.imshow('Webcam', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()


In [9]:
# Run the capture and detection
capture_and_detect()