In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
from sklearn.metrics.pairwise import cosine_similarity
import cv2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))


Running on device: cpu


  return torch._C._cuda_getDeviceCount() > 0


In [3]:
mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20,
              thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
              device=device)

In [4]:
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)


In [5]:
preprocess = transforms.Compose([
    transforms.Resize((160, 160)),      # Resize ảnh về 160x160
    transforms.ToTensor(),              # Chuyển ảnh thành tensor
    transforms.Normalize(               # Chuẩn hóa giá trị pixel
        mean=[0.5, 0.5, 0.5], 
        std=[0.5, 0.5, 0.5]
    )
])

In [6]:
folder_path = "Data"
image_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(('.jpg', '.png'))]

print("Danh sách ảnh:", image_paths)

Danh sách ảnh: ['Data/ac84e587-7413-11ef-bf8a-e81f155e7428.jpg', 'Data/583448a4-aad6-11ef-b52d-c403a8a5aea1.jpg', 'Data/a7b937a7-7413-11ef-b46f-e81f155e7428.jpg', 'Data/ab16ce1e-7413-11ef-a7e1-e81f155e7428.jpg', 'Data/a6a96299-7413-11ef-8665-e81f155e7428.jpg']


In [7]:
def extract_embedding(model, image_path, device):
    image = Image.open(image_path).convert("RGB")
    input_tensor = preprocess(image).unsqueeze(0).to(device)
    with torch.no_grad():
        embedding = model(input_tensor)
    return embedding.cpu().numpy()

In [8]:
embeddings = {}
for file_name in os.listdir(folder_path):
    if file_name.endswith(('.jpg', '.png')):  # Chỉ lấy ảnh có đuôi .jpg hoặc .png
        # Tải ảnh và tiền xử lý
        image_path = os.path.join(folder_path, file_name)
        image = Image.open(image_path).convert("RGB")
        input_tensor = preprocess(image).unsqueeze(0).to(device)

        # Trích xuất embedding
        with torch.no_grad():
            embedding = model(input_tensor).cpu().numpy()

        # Lưu embedding theo tên file
        person_name = os.path.splitext(file_name)[0]  # Lấy tên file mà không có phần mở rộng
        embeddings[person_name] = embedding

# Lưu embeddings vào file để tái sử dụng
np.save("reference_embeddings.npy", embeddings)
print("Embedding của các ảnh tham chiếu đã được lưu.")

Embedding của các ảnh tham chiếu đã được lưu.


In [9]:
reference_embeddings = np.load("reference_embeddings.npy", allow_pickle=True).item()

def cosine_similarity(embedding1, embedding2):
    return np.dot(embedding1, embedding2.T) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))

In [10]:
# Mở camera
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Không thể truy cập camera.")
        break
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    box, prob = mtcnn.detect(rgb_frame)

    if box is not None:
        for (x1, y1, x2, y2) in box:
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            face = rgb_frame[y1:y2, x1:x2]  # Cắt khuôn mặt
            face_image = Image.fromarray(face).resize((160, 160))

            # Trích xuất embedding từ khuôn mặt live cam
            input_tensor = transforms.Compose([
                transforms.Resize((160, 160)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
            ])(face_image).unsqueeze(0).to(device)
            with torch.no_grad():
                live_embedding = model(input_tensor).cpu().numpy()

            # So sánh với embedding tham chiếu
            matched_name = "Unknown"
            max_similarity = 0
            for name, ref_embedding in reference_embeddings.items():
                similarity = cosine_similarity(live_embedding, ref_embedding)
                if similarity > max_similarity and similarity > 0.8:  # Ngưỡng: 0.8
                    matched_name = name
                    max_similarity = similarity
            max_similarity = float(max_similarity)

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{matched_name} ({max_similarity:.2f})", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    cv2.imshow('Live Face Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


  max_similarity = float(max_similarity)
