In [3]:
import torch

# Check if CUDA (GPU) is available
if torch.cuda.is_available():
    print("PyTorch is running on GPU!")
    print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
else:
    print("PyTorch is running on CPU.")


PyTorch is running on GPU!
CUDA Device: NVIDIA GeForce RTX 4050 Laptop GPU


In [4]:
torch.cuda.empty_cache()

In [5]:
import os
import torch
import pickle
import numpy as np
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score

# Set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize MTCNN
mtcnn = MTCNN(keep_all=True, device=device, post_process=False)

# Initialize FaceNet
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Function to extract face embeddings with memory optimization
def extract_face_embeddings(image_path, mtcnn, facenet, max_faces=10):
    try:
        img = Image.open(image_path).convert('RGB')  # Load image
        img = img.resize((160, 160))  # Resize image to reduce memory usage
        faces = mtcnn(img)  # Detect faces
        
        if faces is not None:
            # Limit number of faces to process
            faces = faces[:max_faces]
            faces = faces.to(device)  # Move to GPU if available
            
            # Process faces in smaller batches
            batch_size = 4
            embeddings_list = []
            
            for i in range(0, len(faces), batch_size):
                batch = faces[i:i+batch_size]
                batch_embeddings = facenet(batch)
                embeddings_list.append(batch_embeddings)
            
            embeddings = torch.cat(embeddings_list, dim=0)
            torch.cuda.empty_cache()  # Clear GPU cache
            return embeddings
        return None
    except RuntimeError as e:
        print(f"Error processing {image_path}: {e}")
        return None

# Function to build a dataset of face embeddings with memory optimization
def build_dataset(folder_path, mtcnn, facenet):
    embeddings_list = []
    labels = []

    for label, person_name in enumerate(os.listdir(folder_path)):
        person_folder = os.path.join(folder_path, person_name)
        
        # Process a limited number of images per person to reduce memory load
        image_files = os.listdir(person_folder)[:50]  
        
        for img_name in image_files:
            img_path = os.path.join(person_folder, img_name)
            embeddings = extract_face_embeddings(img_path, mtcnn, facenet)
            
            if embeddings is not None:
                embeddings_list.append(embeddings)
                labels.extend([label] * len(embeddings))
            
            torch.cuda.empty_cache()  # Clear GPU cache after each image

    if embeddings_list:
        return torch.cat(embeddings_list, dim=0), np.array(labels)
    return None, None

def is_my_face(embeddings, my_face_embeddings, threshold):
    distances = [torch.norm(embeddings - my_face_embedding).item() for my_face_embedding in my_face_embeddings]
    min_distance = min(distances)  # Find the smallest distance
    return min_distance < threshold, min_distance

# Main script
if __name__ == "__main__":
    # Path to your dataset
    dataset_folder = r"D:\DB\face_reg\mtcnn_mod"

    # Build the dataset
    embeddings, labels = build_dataset(dataset_folder, mtcnn, facenet)

    if embeddings is not None and labels is not None:
        print("Dataset embeddings and labels have been successfully extracted!")

        # Split dataset into your face and others
        my_face_embeddings = embeddings[labels == 0]  # Assuming label 0 is your face
        others_embeddings = embeddings[labels == 1]  # Assuming label 1 is others

        # Test the model on the dataset
        y_true = []
        y_pred = []
        distances = []

        # Test on your face images
        for embedding in my_face_embeddings:
            is_me, distance = is_my_face(embedding.unsqueeze(0), my_face_embeddings, threshold=0.8)
            y_true.append(1)  # 1 = your face
            y_pred.append(1 if is_me else 0)
            distances.append(distance)

        # Test on others' face images
        for embedding in others_embeddings:
            is_me, distance = is_my_face(embedding.unsqueeze(0), my_face_embeddings, threshold=0.8)
            y_true.append(0)  # 0 = not your face
            y_pred.append(1 if is_me else 0)
            distances.append(distance)

        # Calculate metrics
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)
        confusion = confusion_matrix(y_true, y_pred)

        # Print metrics
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print("Confusion Matrix:")
        print(confusion)

        # Print average distance for your face and others
        avg_distance_my_face = np.mean([d for d, true in zip(distances, y_true) if true == 1])
        avg_distance_others = np.mean([d for d, true in zip(distances, y_true) if true == 0])
        print(f"Average distance (your face): {avg_distance_my_face:.4f}")
        print(f"Average distance (others): {avg_distance_others:.4f}")

        # Save the embeddings and threshold to a file
        model_data = {
            'embeddings': my_face_embeddings,
            'threshold': 0.8  # You can adjust this threshold
        }

        with open('my_face_model.pkl', 'wb') as f:
            pickle.dump(model_data, f)

        print("Model saved to 'my_face_model.pkl'.")
    else:
        print("No embeddings were extracted. Check your input folder.")

# Rest of the script remains the same...

  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(cached_file)


Dataset embeddings and labels have been successfully extracted!
Accuracy: 0.4322
Precision: 0.4322
Recall: 1.0000
Confusion Matrix:
[[ 0 67]
 [ 0 51]]
Average distance (your face): 0.0000
Average distance (others): 0.2586
Model saved to 'my_face_model.pkl'.


In [6]:
import os
import torch
import pickle
import numpy as np
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
from torchvision import transforms
from sklearn.neighbors import KNeighborsClassifier

# Set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize MTCNN
mtcnn = MTCNN(keep_all=True, device=device, post_process=False)

# Initialize FaceNet
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Data augmentation transformations
data_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.Resize((160, 160)),
])

# Function to extract face embeddings with memory optimization
def extract_face_embeddings(image_path, mtcnn, facenet, max_faces=10):
    try:
        img = Image.open(image_path).convert('RGB')  # Load image
        img = data_transforms(img)  # Apply data augmentation
        faces = mtcnn(img)  # Detect faces
        
        if faces is not None:
            # Limit number of faces to process
            faces = faces[:max_faces]
            faces = faces.to(device)  # Move to GPU if available
            
            # Process faces in smaller batches
            batch_size = 4
            embeddings_list = []
            
            for i in range(0, len(faces), batch_size):
                batch = faces[i:i+batch_size]
                batch_embeddings = facenet(batch)
                embeddings_list.append(batch_embeddings)
            
            embeddings = torch.cat(embeddings_list, dim=0)
            torch.cuda.empty_cache()  # Clear GPU cache
            return embeddings
        return None
    except RuntimeError as e:
        print(f"Error processing {image_path}: {e}")
        return None

# Function to build a dataset of face embeddings with memory optimization
def build_dataset(folder_path, mtcnn, facenet):
    embeddings_list = []
    labels = []

    for label, person_name in enumerate(os.listdir(folder_path)):
        person_folder = os.path.join(folder_path, person_name)
        
        # Process a limited number of images per person to reduce memory load
        image_files = os.listdir(person_folder)[:50]  
        
        for img_name in image_files:
            img_path = os.path.join(person_folder, img_name)
            embeddings = extract_face_embeddings(img_path, mtcnn, facenet)
            
            if embeddings is not None:
                embeddings_list.append(embeddings)
                labels.extend([label] * len(embeddings))
            
            torch.cuda.empty_cache()  # Clear GPU cache after each image

    if embeddings_list:
        return torch.cat(embeddings_list, dim=0), np.array(labels)
    return None, None

def train_knn_classifier(embeddings, labels):
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(embeddings.detach().cpu().numpy(), labels)  # Detach tensor before converting to NumPy
    return knn

# Main script
if __name__ == "__main__":
    # Path to your dataset
    dataset_folder = r"D:\DB\face_reg\mtcnn_mod"

    # Build the dataset
    embeddings, labels = build_dataset(dataset_folder, mtcnn, facenet)

    if embeddings is not None and labels is not None:
        print("Dataset embeddings and labels have been successfully extracted!")

        # Train a k-NN classifier
        knn_classifier = train_knn_classifier(embeddings, labels)

        # Test the model on the dataset
        y_true = labels
        y_pred = knn_classifier.predict(embeddings.detach().cpu().numpy())  # Detach tensor before converting to NumPy

        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)
        confusion = confusion_matrix(y_true, y_pred)

        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print("Confusion Matrix:")
        print(confusion)

        model_data = {
            'embeddings': embeddings,
            'labels': labels,
            'knn_classifier': knn_classifier
        }

        with open('my_face_model.pkl', 'wb') as f:
            pickle.dump(model_data, f)

        print("Model saved to 'my_face_model.pkl'.")
    else:
        print("No embeddings were extracted. Check your input folder.")

Dataset embeddings and labels have been successfully extracted!
Accuracy: 0.9231
Precision: 0.9531
Recall: 0.9104
Confusion Matrix:
[[47  3]
 [ 6 61]]
Model saved to 'my_face_model.pkl'.


In [None]:
import cv2
import torch
import pickle
import numpy as np
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
from scipy.spatial.distance import euclidean

device = 'cuda' if torch.cuda.is_available() else 'cpu'

mtcnn = MTCNN(keep_all=True, device=device, post_process=False)

facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

with open('my_face_model.pkl', 'rb') as f:
    model_data = pickle.load(f)
    embeddings = model_data['embeddings']
    labels = model_data['labels']
    knn_classifier = model_data['knn_classifier']

    embeddings = [emb.detach().cpu().numpy() if torch.is_tensor(emb) else emb for emb in embeddings]

def recognize_face(embedding, knn_classifier):
    label = knn_classifier.predict(embedding.detach().cpu().numpy().reshape(1, -1))
    return label[0]  

def calculate_distance(embedding1, embedding2):
    embedding1 = embedding1.flatten()
    embedding2 = embedding2.flatten()
    return euclidean(embedding1, embedding2)

cap = cv2.VideoCapture(0)  # Use 0 for the default camera

while True:
    ret, frame = cap.read()  # Read a frame from the camera
    if not ret:
        break

    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    boxes, _ = mtcnn.detect(pil_image)

    if boxes is not None:
        boxes = boxes.astype(int)

        for i, box in enumerate(boxes):
            face = pil_image.crop((box[0], box[1], box[2], box[3]))
            face = face.resize((160, 160))  # Resize face to match FaceNet input size

            face_tensor = mtcnn(face)  # Shape: [1, 3, 160, 160]

            if face_tensor is not None:
                # Move the tensor to the correct device
                face_tensor = face_tensor.to(device)

                # Remove extra batch dimension if present
                if face_tensor.dim() == 5:  # Check if tensor has 5 dimensions
                    face_tensor = face_tensor.squeeze(0)  # Remove extra batch dimension

                # Ensure the tensor has the correct shape [batch_size, channels, height, width]
                if face_tensor.dim() == 4:
                    face_tensor = face_tensor.squeeze(0)  # Remove extra batch dimension

                # Extract embedding for the face
                embedding = facenet(face_tensor.unsqueeze(0))  # Add batch dimension

                # Move the embedding tensor to CPU and convert to NumPy
                embedding_np = embedding.detach().cpu().numpy()

                # Recognize if the face is yours
                label = recognize_face(embedding, knn_classifier)

                # Calculate the distance between the detected face and the stored embeddings
                distances = [calculate_distance(embedding_np, emb) for emb in embeddings]
                avg_distance = np.mean(distances)  # Average distance to all stored embeddings

                # Draw a bounding box and label
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
                label_text = "You" if label == 0 else "Unknown"
                cv2.putText(frame, f"{label_text} (Distance: {avg_distance:.2f})", (box[0], box[1] - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            else:
                print("No face detected in the cropped region.")
    else:
        print("No faces detected in the frame.")

    # Display the frame
    cv2.imshow('Real-Time Face Recognition', frame)

    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close the window
cap.release()
cv2.destroyAllWindows()

  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(cached_file)


No face detected in the cropped region.
No face detected in the cropped region.
No face detected in the cropped region.
No face detected in the cropped region.
No face detected in the cropped region.
No face detected in the cropped region.
No faces detected in the frame.
No faces detected in the frame.
No faces detected in the frame.
No face detected in the cropped region.
No face detected in the cropped region.
No face detected in the cropped region.
