In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
!pip install torch torchvision numpy matplotlib face_recognition tqdm opencv-python




In [5]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import face_recognition
from tqdm.autonotebook import tqdm
import json
import glob
import copy



  from tqdm.autonotebook import tqdm


In [14]:
def frame_extract(path):
    """
    Extract frames from the video at the given path.
    Args:
        path (str): Path to the video file.
    Yields:
        numpy.ndarray: Frame image.
    """
    vidObj = cv2.VideoCapture(path)
    success, image = vidObj.read()
    while success:
        yield image
        success, image = vidObj.read()


In [15]:
def create_face_videos(input_folder, output_folder):
    """
    Extract faces from videos in the input folder and save the processed videos in the output folder.
    Args:
        input_folder (str): Path to the folder containing input videos.
        output_folder (str): Path to save processed videos.
    """

    os.makedirs(output_folder, exist_ok=True)


    video_files = glob.glob(os.path.join(input_folder, '*.mp4'))
    print(f"Found {len(video_files)} videos in {input_folder}")

    for path in tqdm(video_files):

        out_path = os.path.join(output_folder, os.path.basename(path))


        if os.path.exists(out_path):
            print(f"File already exists: {out_path}")
            continue


        frames = []
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (112, 112))

        for idx, frame in enumerate(frame_extract(path)):
            if idx > 150:  # Limit to the first 150 frames for efficiency
                break

            frames.append(frame)

            if len(frames) == 4:  # Process in batches of 4 frames
                faces = face_recognition.batch_face_locations(frames)
                for i, face in enumerate(faces):
                    if len(face) > 0:  # If a face is detected
                        top, right, bottom, left = face[0]
                        try:
                            cropped_face = cv2.resize(frames[i][top:bottom, left:right], (112, 112))
                            out.write(cropped_face)
                        except Exception as e:
                            print(f"Error processing frame: {e}")
                frames = []

        out.release()


In [16]:
base_folder = '/content/drive/My Drive/DeepFake Dataset'
output_base_folder = '/content/drive/My Drive/Processed_Videos'

# Get all subfolders
subfolders = [f.path for f in os.scandir(base_folder) if f.is_dir()]

for subfolder in subfolders:
    print(f"Processing folder: {subfolder}")

    # Create a corresponding output folder
    output_folder = os.path.join(output_base_folder, os.path.basename(subfolder))

    # Process videos in the current subfolder
    create_face_videos(subfolder, output_folder)


Processing folder: /content/drive/My Drive/DeepFake Dataset/DFDC_Real_Face
Found 1726 videos in /content/drive/My Drive/DeepFake Dataset/DFDC_Real_Face


  0%|          | 0/1726 [00:00<?, ?it/s]

Processing folder: /content/drive/My Drive/DeepFake Dataset/DFDC_Fake_Face
Found 377 videos in /content/drive/My Drive/DeepFake Dataset/DFDC_Fake_Face


  0%|          | 0/377 [00:00<?, ?it/s]

Processing folder: /content/drive/My Drive/DeepFake Dataset/FF_Face
Found 1989 videos in /content/drive/My Drive/DeepFake Dataset/FF_Face


  0%|          | 0/1989 [00:00<?, ?it/s]

Processing folder: /content/drive/My Drive/DeepFake Dataset/Celebrity_Fake_Face
Found 596 videos in /content/drive/My Drive/DeepFake Dataset/Celebrity_Fake_Face


  0%|          | 0/596 [00:00<?, ?it/s]

Processing folder: /content/drive/My Drive/DeepFake Dataset/Celebrity_Real_Face
Found 588 videos in /content/drive/My Drive/DeepFake Dataset/Celebrity_Real_Face


  0%|          | 0/588 [00:00<?, ?it/s]