In [28]:
import os
import cv2
from tqdm import tqdm

def extract_frames_from_video(video_path, output_dir, base_name):
    # This function extracts frames from a video file located at 'video_path' and saves them as images.
    # 'output_dir' specifies the directory where the images will be saved.
    # 'base_name' will be used as a prefix for the saved image files.
    cap = cv2.VideoCapture(video_path) # Loop over frames in the video while the capture is open.
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read() # Read the next frame from the video.
        if not ret:
            break
        frame_name = f"{base_name}_{frame_count}.jpg"
        # Save the frame as a JPEG image in the specified output directory.
        cv2.imwrite(os.path.join(output_dir, frame_name), frame)
        frame_count += 1
    cap.release()

def create_dataset(root_dir, new_dataset_dir):
    # This function creates a dataset of frames from videos stored in 'root_dir'.
    # Frames will be saved in 'new_dataset_dir' under two subdirectories: 'attack' and 'real'.
    # Create the new dataset directory and subdirectories for 'attack' and 'real' categories.
    
    # Ensure the new dataset directories exist
    os.makedirs(new_dataset_dir, exist_ok=True)
    attack_dir = os.path.join(new_dataset_dir, 'attack')
    real_dir = os.path.join(new_dataset_dir, 'real')
    os.makedirs(attack_dir, exist_ok=True)
    os.makedirs(real_dir, exist_ok=True)

    # Walk through the root directory to find the videos
    for root, dirs, files in os.walk(root_dir):
        for file in tqdm(files): # Iterate over all files
            if file.endswith(('.mov', '.mp4')): # Check if the file is a video (MP4 or MOV format).
                file_path = os.path.join(root, file) # Get the full path of the video file.
                # Determine if the video is an 'attack' or 'real' based on the directory name.
                if 'attack' in root:
                    output_subdir = attack_dir
                elif 'real' in root:
                    output_subdir = real_dir
                else:
                    continue  # Skip files not in the 'attack' or 'real' directories.
                
                base_name = os.path.splitext(file)[0]
                extract_frames_from_video(file_path, output_subdir, base_name)

# Define the paths
root_dir = r'C:\Users\Pepper\Desktop\MSU-MFSD-Dataset\root\scene01'
new_dataset_dir = r'C:\Users\Pepper\Desktop\MSU-MFSD-Dataset\new_dataset'

# Create the new dataset
create_dataset(root_dir, new_dataset_dir)


0it [00:00, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 420/420 [11:42<00:00,  1.67s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 140/140 [04:13<00:00,  1.81s/it]
