In [1]:
import cv2
import dlib
import json
import os
from tqdm import tqdm

# Load the metadata from JSON file
metadata_file = 'train_sample_videos/metadata.json'
with open(metadata_file, 'r') as json_file:
    metadata = json.load(json_file)

# Initialize face detector from dlib
detector = dlib.get_frontal_face_detector()

# Folder to save the dataset
dataset_folder = 'dataset'
os.makedirs(dataset_folder, exist_ok=True)

# Create folders for 'fake' and 'true'
fake_folder = os.path.join(dataset_folder, 'fake')
true_folder = os.path.join(dataset_folder, 'true')
os.makedirs(fake_folder, exist_ok=True)
os.makedirs(true_folder, exist_ok=True)

# Limit on the number of frames to extract per video
max_frames_per_video = 8

# Iterate through each video in metadata, extract up to 15 frames with faces, and save in the dataset
total_videos = len(metadata)
progress_bar = tqdm(total=total_videos, desc='Processing Videos')

for video_filename, video_info in metadata.items():
    video_path = os.path.join('train_sample_videos', video_filename)
    label = video_info['label']

    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    extracted_frames = 0

    for frame_id in range(frame_count):
        ret, frame = cap.read()
        if not ret or extracted_frames >= max_frames_per_video:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        if faces:
            for face_id, face in enumerate(faces):
                x, y, w, h = face.left(), face.top(), face.width(), face.height()
                face_img = frame[y:y+h, x:x+w]

                # Check if the face image is not empty before writing
                if not face_img.size == 0:
                    # Save face in the corresponding folder
                    folder = true_folder if label == 'REAL' else fake_folder
                    filename = f'{os.path.basename(video_path)}_frame_{frame_id}.png'
                    filepath = os.path.join(folder, filename)
                    cv2.imwrite(filepath, face_img)

                    extracted_frames += 1
                    if extracted_frames >= max_frames_per_video:
                        break

    cap.release()
    progress_bar.update(1)

progress_bar.close()
print("Processing complete.")


Processing Videos: 100%|███████████████████████| 400/400 [19:56<00:00,  2.99s/it]

Processing complete.



