### Mount Google Drive

In [None]:
#Mount our google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# This will print 'cpu' on Colab if the Hardware accelerator is set to 'None'
print(f"Using device: {device}")

# Example: create a tensor on the selected device
tensor = torch.tensor([1.0, 2.0]).to(device)


In [None]:
# Import necessary libraries
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm.autonotebook import tqdm
import glob

In [None]:
import face_recognition

In [None]:
# Glob module is used to retrieve files/pathnames matching a specified pattern
# Here, it retrieves all .mp4 files from the 'fake' directory
video_files =  glob.glob("/content/drive/MyDrive/raw dataset (videos)/train/*.mp4")

# Similarly, it retrieves all .mp4 files from the 'real' directory
video_files1 =  glob.glob("/content/drive/MyDrive/raw dataset (videos)/test/*.mp4")

# Concatenating the two lists of video files
video_files += video_files1

In [None]:
def frame_extract(path):
    """
    Generator function to extract frames from a video file.

    Args:
    - path: Path to the video file.

    Yields:
    - image: Each frame extracted from the video.
    """
    vidObj = cv2.VideoCapture(path)
    success = 1
    while success:
        success, image = vidObj.read()
        if success:
            yield image

In [None]:
def create_face_videos(path_list, train_out_dir, test_out_dir):
    """
    Function to create cropped face videos from a list of video files.

    Args:
    - path_list: List of paths to the video files.
    - fake_out_dir: Directory to save the cropped face videos for fake data.
    - real_out_dir: Directory to save the cropped face videos for real data.
    """
    already_present_count_fake = glob.glob(train_out_dir + '*.mp4')
    already_present_count_real = glob.glob(test_out_dir + '*.mp4')
    print("Number of fake videos already present:", len(already_present_count_fake))
    print("Number of real videos already present:", len(already_present_count_real))
    for path in tqdm(path_list):
        out_dir = train_out_dir if 'train' in path else test_out_dir
        out_path = os.path.join(out_dir, path.split('/')[-1])
        file_exists = glob.glob(out_path)
        if len(file_exists) != 0:
            print("File already exists:", out_path)
            continue
        frames = []
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc('M','J','P','G'), 30, (256,256))
        for idx, frame in enumerate(frame_extract(path)):
            if idx < 60:  # Keep only the first 60 frames
                frames.append(frame)
                if len(frames) == 4:
                    faces = face_recognition.batch_face_locations(frames)
                    for i, face in enumerate(faces):
                        if len(face) != 0:
                            top, right, bottom, left = face[0]
                            try:
                                out.write(cv2.resize(frames[i][top:bottom, left:right, :], (256,256)))
                            except:
                                pass
                    frames = []
        out.release()

In [None]:
# Create cropped face videos for both fake and real videos
create_face_videos(video_files, '/content/drive/MyDrive/preprocessed dataset/train', '/content/drive/MyDrive/preprocessed dataset/test')