In [2]:
import cv2
import os
from glob import glob

def extract_frames(video_path, output_folder, frame_interval=10):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            frame_path = os.path.join(output_folder, f"{os.path.basename(video_path)}_frame_{frame_count}.jpg")
            cv2.imwrite(frame_path, frame)
        frame_count += 1

    cap.release()
    print(f"Frames extracted for {video_path}")

# Process Real Videos
real_videos = glob("Deepfake-Dataset/Celeb-real/*.mp4") + glob("Deepfake-Dataset/YouTube-real/*.mp4")
for video in real_videos:
    extract_frames(video, "dataset/frames/real")

# Process Fake Videos
fake_videos = glob("Deepfake-Dataset/Celeb-synthesis/*.mp4")
for video in fake_videos:
    extract_frames(video, "dataset/frames/fake")


Frames extracted for Deepfake-Dataset/Celeb-real\id0_0000.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0001.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0002.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0003.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0004.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0005.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0006.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0007.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0008.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id0_0009.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id10_0000.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id10_0001.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id10_0002.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id10_0003.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id10_0004.mp4
Frames extracted for Deepfake-Dataset/Celeb-real\id10_0005.mp4
Fr

**Resizing the Images**

In [3]:
from PIL import Image
import glob

def resize_images(input_folder, output_folder, img_size=(224, 224)):
    os.makedirs(output_folder, exist_ok=True)
    image_paths = glob.glob(os.path.join(input_folder, "*.jpg"))

    for img_path in image_paths:
        img = Image.open(img_path)
        img = img.resize(img_size)
        img.save(os.path.join(output_folder, os.path.basename(img_path)))

# Resize real and fake images
resize_images("dataset/frames/real", "dataset/train/real")
resize_images("dataset/frames/fake", "dataset/train/fake")
resize_images("dataset/frames/real", "dataset/test/real")
resize_images("dataset/frames/fake", "dataset/test/fake")


In [21]:
import os
import shutil

# Define paths
resized_folder = "dataset/resized_frames"
real_folder = os.path.join(resized_folder, "real")
fake_folder = os.path.join(resized_folder, "fake")

# Create necessary directories
os.makedirs(real_folder, exist_ok=True)
os.makedirs(fake_folder, exist_ok=True)

# Function to move resized images while keeping real and fake separate
def move_resized_images(source_folder, dest_folder):
    for category in ["real", "fake"]:
        category_path = os.path.join(source_folder, category)
        dest_category_path = os.path.join(dest_folder, category)

        if os.path.exists(category_path):
            os.makedirs(dest_category_path, exist_ok=True)  # Ensure destination exists

            for file in os.listdir(category_path):
                shutil.move(os.path.join(category_path, file), os.path.join(dest_category_path, file))

            print(f"Moved resized {category} images from {source_folder} to {dest_category_path}")

# Move images from train and test folders to resized_frames
move_resized_images("dataset/train", resized_folder)
move_resized_images("dataset/test", resized_folder)


Moved resized real images from dataset/train to dataset/resized_frames\real
Moved resized fake images from dataset/train to dataset/resized_frames\fake
Moved resized real images from dataset/test to dataset/resized_frames\real
Moved resized fake images from dataset/test to dataset/resized_frames\fake


In [5]:
import glob
import random

def split_resized_data(source_folder, train_folder, test_folder, split_ratio=0.8):
    for category in ["real", "fake"]:
        category_path = os.path.join(source_folder, category)
        train_category_path = os.path.join(train_folder, category)
        test_category_path = os.path.join(test_folder, category)

        if not os.path.exists(category_path):
            continue

        os.makedirs(train_category_path, exist_ok=True)
        os.makedirs(test_category_path, exist_ok=True)

        image_paths = glob.glob(os.path.join(category_path, "*.jpg"))
        random.shuffle(image_paths)

        split_point = int(len(image_paths) * split_ratio)
        train_images = image_paths[:split_point]
        test_images = image_paths[split_point:]

        for img in train_images:
            shutil.move(img, os.path.join(train_category_path, os.path.basename(img)))

        for img in test_images:
            shutil.move(img, os.path.join(test_category_path, os.path.basename(img)))

        print(f"Split {category}: {len(train_images)} train, {len(test_images)} test")

# Now split correctly into train and test
split_resized_data("dataset/resized_frames", "dataset/train", "dataset/test")


Split real: 14175 train, 3544 test
Split fake: 25305 train, 6327 test


In [6]:
print("Train Real:", len(os.listdir("dataset/train/real")))
print("Train Fake:", len(os.listdir("dataset/train/fake")))
print("Test Real:", len(os.listdir("dataset/test/real")))
print("Test Fake:", len(os.listdir("dataset/test/fake")))



Train Real: 14175
Train Fake: 25305
Test Real: 3544
Test Fake: 6327


In [7]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import os


In [8]:
train_dir = "dataset/train"
test_dir = "dataset/test"
img_size = (224, 224)  # Adjust if needed
batch_size = 32


In [9]:
train_datagen = ImageDataGenerator(rescale=1.0/255, rotation_range=20, zoom_range=0.2, horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)


Found 39480 images belonging to 2 classes.
Found 9871 images belonging to 2 classes.


In [11]:
from tensorflow.keras import Input

model = Sequential([
    Input(shape=(224, 224, 3)),  # Explicit Input layer
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification (Real or Fake)
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()  # Show model architecture


In [12]:
history = model.fit(train_generator, validation_data=test_generator, epochs=5)


  self._warn_if_super_not_called()


Epoch 1/5
[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1856s[0m 2s/step - accuracy: 0.7255 - loss: 0.5921 - val_accuracy: 0.8287 - val_loss: 0.4302
Epoch 2/5
[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1221s[0m 990ms/step - accuracy: 0.8319 - loss: 0.4156 - val_accuracy: 0.8193 - val_loss: 0.4268
Epoch 3/5
[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1281s[0m 1s/step - accuracy: 0.8519 - loss: 0.3813 - val_accuracy: 0.8571 - val_loss: 0.3761
Epoch 4/5
[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1213s[0m 983ms/step - accuracy: 0.8589 - loss: 0.3693 - val_accuracy: 0.8660 - val_loss: 0.3586
Epoch 5/5
[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1097s[0m 888ms/step - accuracy: 0.8642 - loss: 0.3564 - val_accuracy: 0.8708 - val_loss: 0.3557


In [13]:
model.save("deepfake_model.h5")




In [14]:
model.save("deepfake_model.keras")


In [15]:
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")


[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 119ms/step - accuracy: 0.8734 - loss: 0.3499
Test Accuracy: 87.08%


In [19]:
from tensorflow.keras.preprocessing import image
import numpy as np

img_path = "fake_10009.jpg"  # Change this to your image path
img = image.load_img(img_path, target_size=(224, 224))  # Match the model's input size
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

prediction = model.predict(img_array)
print("Deepfake" if prediction > 0.5 else "Real")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Real
