In [None]:
import cv2 
import os
import numpy as np 
from tqdm import tqdm
import torch.nn as nn
from torchvision import models

### Extract Frames

In [None]:
def extract_frames(video_path, output_dir, num_frames):
    """
    Extracts a fixed number of evenly spaced frames from a video.
    Args:
        video_path (str): Path to the input video file.
        output_dir (str): Directory where extracted frames will be saved.
        num_frames (int): Number of frames to extract.
    """

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Open the video file
    cap = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the step size (interval between extracted frames)
    step = max(total_frames // num_frames, 1)  # Avoid division by zero

    count = 0       # Current frame index
    extracted = 0   # Number of frames extracted

    while extracted < num_frames:
        # Move to the specific frame position
        cap.set(cv2.CAP_PROP_POS_FRAMES, count)

        # Read the frame
        ret, frame = cap.read()
        if not ret:
            break  # Stop if there are no more frames to read

        # Save the extracted frame as an image file
        frame_path = os.path.join(output_dir, f"frame_{extracted:04d}.jpg")
        cv2.imwrite(frame_path, frame)

        # Move to the next frame position
        count += step
        extracted += 1

    # Release the video capture object
    cap.release()

In [None]:
def preprocess_videos(root_dir, output_dir, num_frames=16):
    """
    Preprocess videos from the FaceForensics++ dataset.
    Args:
        root_dir (str): Root directory containing the manipulated_sequences folder.
        output_dir (str): Directory where processed frames will be saved.
        num_frames (int): Number of frames to extract from each video.
    """
    os.makedirs(output_dir, exist_ok=True)

    

    # Traverse each manipulation method (e.g., Deepfakes, FaceSwap)
    for method in os.listdir(root_dir):
        print(method)
        method_path = os.path.join(root_dir, method, "c23", "videos")
        if not os.path.isdir(method_path):
            continue  # Skip if not a valid directory

        # Create an output directory for this method
        method_output_dir = os.path.join(output_dir, method)
        os.makedirs(method_output_dir, exist_ok=True)

        videos = os.listdir(method_path)
        
        
        
        with tqdm(total=len(videos), desc=f"Processing videos for {method}", unit="video") as pbar:
                
            # Traverse each video in the folder
            for video_file in os.listdir(method_path):
                video_path = os.path.join(method_path, video_file)
                video_output_dir = os.path.join(method_output_dir, os.path.splitext(video_file)[0])
                os.makedirs(video_output_dir, exist_ok=True)

                try: 
                    extract_frames(video_path, video_output_dir, num_frames)
                except Exception as e:
                    print(f"Error processing {video_file}: {e}")

                pbar.update(1)


### Extract Faces

In [None]:
# Initialize the YuNet face detector using an ONNX model
face_detector = cv2.FaceDetectorYN_create(
    "./face_detection_yunet_2023mar.onnx",  # Path to the ONNX model file for face detection
    "",  # Empty string as no additional configurations are required
    (320, 320),  # Input size of the face detector (width, height)
    0.9,  # Score threshold: Minimum confidence for a detected face to be considered valid
    0.3,  # Non-Maximum Suppression (NMS) threshold: Controls suppression of overlapping detections
    5000  # Top-k: Limits the number of top detections retained
)

In [None]:
def process_all_videos(root_dir, output_dir, face_detector, margin=50):
    """
    Processes all Jimages in the root directory and its subdirectories,
    detects faces, crops them with an optional margin, and saves the results
    in a mirrored directory structure.

    Parameters:
        root_dir (str): Path to the directory containing image files.
        output_dir (str): Directory where cropped face images will be saved.
        face_detector (cv2.FaceDetectorYN): Initialized OpenCV face detector.
        margin (int): Additional pixels to include around detected faces.

    Returns:
    - None
    """

    # 1. Gather all .jpg files from all subdirectories
    jpg_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.lower().endswith(".jpg"):  # Ensure case-insensitive match
                jpg_files.append(os.path.join(dirpath, filename))

    # 2. Create a progress bar to track processing
    pbar = tqdm(total=len(jpg_files), desc="Processing Frames")

    # 3. Loop through all image files and process them
    for frame_path in jpg_files:
        # Read the image
        frame = cv2.imread(frame_path)
        if frame is None:  # Skip if the image is unreadable
            pbar.update(1)
            continue

        # Get image dimensions
        h, w = frame.shape[:2]

        # Set input size for the face detector
        face_detector.setInputSize((w, h))

        # Detect faces in the image
        _, faces = face_detector.detect(frame)

        cropped_images = []  # List to store cropped face images

        # If faces are detected, extract and crop them
        if faces is not None and len(faces) > 0:
            for face in faces:
                # Extract face bounding box coordinates
                x, y, w_box, h_box = face[:4].astype(int)

                # Apply margin and ensure the cropped region remains within bounds
                new_x = max(x - margin, 0)
                new_y = max(y - margin, 0)
                new_w = min(w_box + 2 * margin, w - new_x)
                new_h = min(h_box + 2 * margin, h - new_y)

                # Extract the cropped face from the image
                cropped_face = frame[new_y:new_y+new_h, new_x:new_x+new_w]
                cropped_images.append(cropped_face)

        # Compute the corresponding output directory for saving cropped faces
        rel_dir = os.path.relpath(os.path.dirname(frame_path), root_dir)
        out_dir = os.path.join(output_dir, rel_dir)
        os.makedirs(out_dir, exist_ok=True)  # Create the output directory if it doesn't exist

        # Save each cropped face with an index
        base_name, ext = os.path.splitext(os.path.basename(frame_path))
        for i, crop in enumerate(cropped_images):
            out_path = os.path.join(out_dir, f"{base_name}_face_{i}{ext}")
            cv2.imwrite(out_path, crop)  # Save the cropped face

        # Update the progress bar
        pbar.update(1)

    # Close the progress bar after processing all images
    pbar.close()