In [2]:
import cv2
import torch
from facenet_pytorch import InceptionResnetV1, MTCNN
from PIL import Image
import numpy as np

In [3]:
# Step 1: Initialize FaceNet model and MTCNN detector
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(keep_all=True, device=device)  # MTCNN for face detection
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)  # Pre-trained FaceNet model

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [17]:

# Step 2: Load video file and get frame rate
video_path = 'C:/Users/oskik/PycharmProjects/InżynierkaSandbox/S01_boris/C01/sliced.mp4'  # Replace with your video path
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")
else:
    print("Video opened successfully!")

# Calculate frames per second (fps) and total frames
fps = cap.get(cv2.CAP_PROP_FPS)  # Frames per second of the video
frame_interval = int(fps)  # Number of frames to skip to get to 1 second interval

Video opened successfully!


In [18]:

# Step 3: Process video at 1 second intervals
frame_count = 0
frame_embeddings = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Check if the frame is at the 1-second interval
    if frame_count % frame_interval == 0:
        # Convert frame to RGB (OpenCV uses BGR by default)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_pil = Image.fromarray(frame_rgb)

        # Step 4: Detect faces in the frame
        boxes, _ = mtcnn.detect(frame_pil)
        if boxes is None:
            frame_count += 1
            continue  # Skip frames with no detected faces

        # Step 5: Crop and align each detected face
        faces = mtcnn(frame_pil)  # This will return aligned faces

        # Step 6: Generate embeddings for each face
        if faces is not None:
            faces = faces.to(device)
            embeddings = facenet(faces)  # Generate embeddings
            frame_embeddings.append((frame_count // frame_interval, embeddings.cpu().detach().numpy()))  # Store embeddings with timestamp

    frame_count += 1

# Step 7: Release video capture
cap.release()

KeyboardInterrupt: 

In [6]:

# Display the number of frames processed and total face embeddings extracted
print(f"Processed frames at 1-second intervals.")
print(f"Extracted embeddings for {len(frame_embeddings)} timestamps (seconds).")


Processed frames at 1-second intervals.
Extracted embeddings for 0 timestamps (seconds).
