In [None]:
!kaggle datasets download -d reubensuju/celeb-df-v2

In [None]:
!unzip -q /content/celeb-df-v2.zip

In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np

# Create the directory if it doesn't exist
os.makedirs("/content/frame", exist_ok=True)

cap = cv2.VideoCapture("/content/YouTube-real/00004.mp4")
c=1
while cap.isOpened():
  ret, frame = cap.read()
  if ret:
    cv2.imwrite(f"/content/frame/frame{c}.jpg", frame)
    c+=1
  else:
    break

cap.release()
cv2.destroyAllWindows()


In [None]:
import shutil as s
s.rmtree("/content/frame")

In [None]:
import os
import shutil

def copy_videos(src_directory, dest_directory, limit=300):
    # Ensure the destination directory exists
    os.makedirs(dest_directory, exist_ok=True)

    # Get all video files in the source directory
    video_extensions = ('.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv')
    video_files = [f for f in os.listdir(src_directory) if f.lower().endswith(video_extensions)]

    # Copy up to the specified limit of video files
    for i, video_file in enumerate(video_files):
        if i >= limit:
            break
        src_path = os.path.join(src_directory, video_file)
        dest_path = os.path.join(dest_directory, video_file)
        shutil.copy2(src_path, dest_path)
        print(f"Copied: {video_file} to {dest_directory}")

# Example usage:
copy_videos('/content/YouTube-real', '/content/Dataset/Real_Videos')
copy_videos('/content/Celeb-synthesis', '/content/Dataset/Fake_Videos')


In [None]:
import os
import cv2

def video_to_frames(video_dir, output_dir, frame_limit=64):
    """
    Converts videos in a given directory to frames, limiting to a specified number of frames.

    Args:
        video_dir (str): Path to the directory containing video files.
        output_dir (str): Path to the directory where frames will be saved.
        frame_limit (int): Maximum number of frames to extract from each video.

    Returns:
        None
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(video_dir):
        if filename.endswith(('.mp4', '.avi', '.mov', '.mkv')):  # Add more extensions as needed
            video_path = os.path.join(video_dir, filename)
            cap = cv2.VideoCapture(video_path)

            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            frame_interval = max(1, frame_count // frame_limit)  # Calculate frame interval

            count = 0
            saved_count = 0

            while cap.isOpened() and saved_count < frame_limit:
                ret, frame = cap.read()
                if not ret:
                    break

                if count % frame_interval == 0:
                    frame_filename = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}_frame{saved_count:04d}.jpg")
                    cv2.imwrite(frame_filename, frame)
                    saved_count += 1

                count += 1

            cap.release()
            print(f"Processed {filename}: saved {saved_count} frames.")

    print("Processing complete.")

# Example usage:
video_to_frames('/content/Dataset/Fake_Videos', '/content/Frames/Fake_Videos')
video_to_frames('/content/Dataset/Real_Videos', '/content/Frames/Real_Videos')


In [None]:
import cv2
import os

def detect_and_crop_faces(input_dir, output_dir, cascade_path='/content/haarcascade_frontalface_alt.xml'):
    # Load Haar Cascade for face detection
    face_cascade = cv2.CascadeClassifier(cascade_path)

    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through each file in the input directory
    for filename in os.listdir(input_dir):
        file_path = os.path.join(input_dir, filename)

        # Read the image
        image = cv2.imread(file_path)
        if image is None:
            print(f"Could not read image: {file_path}")
            continue

        # Convert image to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Detect faces
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

        # Crop and save each detected face
        for i, (x, y, w, h) in enumerate(faces):
            face = image[y:y+h, x:x+w]
            output_file_path = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}_face_{i}.jpg")
            cv2.imwrite(output_file_path, face)

    print("Face detection and cropping completed.")

# Example usage
detect_and_crop_faces("/content/Frames/Fake_Videos", "/content/Face_Cropped/Fake")
detect_and_crop_faces("/content/Frames/Real_Videos", "/content/Face_Cropped/Real")


In [None]:
print(len(os.listdir("/content/Frames/Fake_Videos")))
print(len(os.listdir("/content/Frames/Real_Videos")))
print(len(os.listdir("/content/Face_Cropped/Fake")))
print(len(os.listdir("/content/Face_Cropped/Real")))

In [None]:
!zip -r dataset.zip /content/Dataset