<a href="https://colab.research.google.com/github/AnuBaluguri/DeepFake-Detection/blob/main/PreprocessingForVideos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from google.colab import drive


In [None]:
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Define paths
deepfake_videos_path = '/content/drive/MyDrive/Deepfake Detection Data/Deepfakes/videos'
original_videos_path = '/content/drive/MyDrive/Deepfake Detection Data/Original/videos'

In [None]:
# Helper function to extract frames from videos
def extract_frames_recursive(folder_path, label, frame_limit=30, img_size=(299, 299)):
    """
    Extract frames from videos in all subfolders, resize them, and normalize pixel values.
    Args:
        folder_path: Path to the folder containing videos and subfolders.
        label: Class label for the video (0 for original, 1 for deepfake).
        frame_limit: Number of frames to extract per video.
        img_size: Target size for resizing frames (default: 299x299 for XceptionNet).
    Returns:
        frames: List of processed frames.
        labels: Corresponding labels for the frames.
    """
    frames = []
    labels = []

    for root, _, files in os.walk(folder_path):
        for video_file in files:
            if video_file.endswith(('.mp4', '.avi', '.mov', '.mkv')):  # Supported video formats
                video_file_path = os.path.join(root, video_file)
                cap = cv2.VideoCapture(video_file_path)
                count = 0
                while count < frame_limit and cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frame = cv2.resize(frame, img_size)  # Resize to 299x299
                    frame = frame / 255.0  # Normalize to [0, 1]
                    frames.append(frame)
                    labels.append(label)
                    count += 1
                cap.release()

    return frames, labels



In [None]:
# Extract frames from both categories
print("Processing deepfake videos...")
deepfake_frames, deepfake_labels = extract_frames_recursive(deepfake_videos_path, label=1)


Processing deepfake videos...


In [None]:
print("Processing original videos...")
original_frames, original_labels = extract_frames_recursive(original_videos_path, label=0)


Processing original videos...


In [None]:
# Combine and shuffle data
frames = np.array(deepfake_frames + original_frames)
labels = np.array(deepfake_labels + original_labels)


In [None]:
# Convert labels to categorical
labels = to_categorical(labels, num_classes=2)


In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(frames, labels, test_size=0.2, random_state=42)

In [None]:
# Save preprocessed data
np.save('/content/drive/MyDrive/Deepfake Detection Data/X_train.npy', X_train)
np.save('/content/drive/MyDrive/Deepfake Detection Data/X_test.npy', X_test)
np.save('/content/drive/MyDrive/Deepfake Detection Data/y_train.npy', y_train)
np.save('/content/drive/MyDrive/Deepfake Detection Data/y_test.npy', y_test)


In [None]:
print("Preprocessing complete. Data saved to Google Drive.")


Preprocessing complete. Data saved to Google Drive.
