In [1]:
import torch
print(torch.__version__)

2.1.0


In [12]:
import cv2
import numpy as np
from mtcnn import MTCNN
from deep_sort_realtime.deepsort_tracker import DeepSort
import face_recognition
import os
import pickle

In [3]:
# Initialize DeepSORT tracker
tracker = DeepSort(max_age=10)

In [4]:
# Load pre-trained MTCNN model for face detection
mtcnn = MTCNN()

In [5]:
# Open video file
video_path = "Class Room Entrance.mp4"
cap = cv2.VideoCapture(video_path)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

In [6]:
# Define codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Use "mp4v" for mp4 format
output_path = "output_video.mp4"
out = cv2.VideoWriter(output_path, fourcc, 20.0, (frame_width, frame_height))

In [7]:
frame_id = 0
skip_frames = 2  # Skip the frames

In [8]:
# Dictionary to track whether the face image and embedding have been saved for each ID
saved_data = {}

In [9]:
# Create a directory to save face images
os.makedirs("faces(V2)", exist_ok=True)

In [10]:
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_id += 1

    # Skip frames until reaching the nth frame
    if frame_id <= skip_frames:
        continue

    # Detect faces using MTCNN
    faces = mtcnn.detect_faces(frame)
    bbs = [(face['box'], face['confidence'], face['keypoints']) for face in faces]

    # Update tracker with the detected faces
    tracks = tracker.update_tracks(bbs, frame=frame)

    # Draw bounding boxes and IDs on the frame
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id

        # Check if data for this ID has already been saved
        if track_id in saved_data:
            continue

        ltrb = track.to_ltrb()

        # Draw bounding box
        cv2.rectangle(frame, (int(ltrb[0]), int(ltrb[1])), (int(ltrb[2]), int(ltrb[3])), (0, 255, 0), 2)

        # Get facial landmarks using face_recognition library
        face_locations = [(int(ltrb[1]), int(ltrb[2]), int(ltrb[3]), int(ltrb[0]))]  # (top, right, bottom, left)
        landmarks = face_recognition.face_landmarks(frame, face_locations)

        if landmarks:
            # Get face embedding
            face_encoding = face_recognition.face_encodings(frame, face_locations)[0]

            # Check if the face embedding is not empty before saving
            if len(face_encoding) > 0:
                # Save the face embedding
                saved_data[track_id] = {
                    "embedding": face_encoding,
                    "image_path": f"faces(V2)/{track_id}_frame{frame_id}.png"
                }

                # Save the face image
                face_image = frame[int(ltrb[1]):int(ltrb[3]), int(ltrb[0]):int(ltrb[2])]
                cv2.imwrite(saved_data[track_id]["image_path"], face_image)

                # Draw label
                label = f"ID: {track_id}"
                cv2.putText(frame, label, (int(ltrb[0]), int(ltrb[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Write the frame with bounding boxes to the output video
    out.write(frame)

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()



In [11]:
saved_data

{'1': {'embedding': array([-0.09345853,  0.08243297,  0.10074709,  0.0128797 , -0.01806457,
         -0.09152298,  0.01573261, -0.16753612,  0.15951666, -0.12044359,
          0.18417245, -0.01720166, -0.18613513, -0.13084017,  0.03334838,
          0.10668251, -0.17681073, -0.18979412,  0.03168201, -0.07258323,
          0.07295881,  0.02574221,  0.03813753,  0.10490339, -0.16000868,
         -0.31494191, -0.09209922, -0.13581327,  0.03662596,  0.03800499,
         -0.00055083,  0.11550677, -0.24290365,  0.02384472, -0.03773398,
          0.09062831,  0.06182844, -0.03312555,  0.14425136,  0.02394245,
         -0.23330134, -0.0928498 , -0.01706029,  0.23016128,  0.19735605,
         -0.02951657,  0.02020999,  0.01981216,  0.06221039, -0.20211273,
          0.06228185,  0.13060887,  0.08904135,  0.03112529, -0.06803882,
         -0.23067471, -0.03865461,  0.0121895 , -0.22021085,  0.0317857 ,
          0.02546017, -0.17944708, -0.07192829,  0.00739376,  0.20836982,
          0.13338818

In [13]:
# Save the saved_data dictionary to a file
output_data_path = "saved_data.pkl"
with open(output_data_path, 'wb') as f:
    pickle.dump(saved_data, f)

In [14]:
saved_embeddings_list = [data["embedding"] for data in saved_data.values()]

# Compare faces
unique_faces = set()

for i in range(len(saved_embeddings_list)):
    for j in range(i + 1, len(saved_embeddings_list)):
        # Compare embeddings using face_recognition.compare_faces
        is_same_person = face_recognition.compare_faces([saved_embeddings_list[i]], saved_embeddings_list[j])[0]

        # If the embeddings represent the same person, add them to the set of unique faces
        if is_same_person:
            unique_faces.add(i)
            unique_faces.add(j)

# Print the number of unique faces
print(f"Number of unique faces: {len(unique_faces)}")

Number of unique faces: 32
