In [None]:
import os
import cv2
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Disable oneDNN optimizations for TensorFlow (if needed)
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

In [None]:
# Function to extract frames from videos
def extract_frames(video_path, label, max_frames=10):
    video_frames = []
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))  # Resize frames to 224x224
        if len(frame.shape) == 2:  # Convert grayscale to RGB if needed
            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
        video_frames.append([frame, label])
        frame_count += 1
    cap.release()
    return video_frames

# Function to load data from directories
def load_data(paths, label):
    start_time = time.time()
    data = []
    for path in paths:
        print(f"Processing path: {path}")
        for video in os.listdir(path):
            video_path = os.path.join(path, video)
            data.extend(extract_frames(video_path, label))
    end_time = time.time()
    print(f"Loaded data from {paths}. Time taken: {end_time - start_time:.2f} seconds")
    return data


In [25]:
# Paths to datasets
real_paths = ["D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-real", "D:\\Dataset_Celeb_df\\Celeb-DF\\YouTube-real"]
synthetic_paths = ["D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-synthesis"]

# Load and label data
real_data = load_data(real_paths, 0)  # 0 for real
synthetic_data = load_data(synthetic_paths, 1)  # 1 for synthetic

Processing path: D:\Dataset_Celeb_df\Celeb-DF\Celeb-real
Processing path: D:\Dataset_Celeb_df\Celeb-DF\YouTube-real
Loaded data from ['D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-real', 'D:\\Dataset_Celeb_df\\Celeb-DF\\YouTube-real']. Time taken: 21.77 seconds
Processing path: D:\Dataset_Celeb_df\Celeb-DF\Celeb-synthesis
Loaded data from ['D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-synthesis']. Time taken: 39.19 seconds


In [27]:
# Combine and split data
all_data = real_data + synthetic_data
X, y = zip(*all_data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to numpy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

# Verify shapes
print(f"X_train shape: {X_train.shape}")  # Should be (num_samples, 224, 224, 3)
print(f"y_train shape: {y_train.shape}")  # Should be (num_samples,)

X_train shape: (7856, 224, 224, 3)
y_train shape: (7856,)


In [29]:
# Data Augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# Create the training generator
train_generator = train_datagen.flow(
    X_train,  # Input data (images)
    y_train,  # Labels
    batch_size=32,
    shuffle=True
)

# Validation Data Generator (No Augmentation, Only Rescaling)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create the validation generator
test_generator = test_datagen.flow(
    X_test,  # Input data (images)
    y_test,  # Labels
    batch_size=32,
    shuffle=False
)


In [31]:
# Build the model using EfficientNetB0 (Transfer Learning)
def build_model():
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the base model

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create the model
model = build_model()

# Compute class weights for imbalanced data
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

# Learning rate scheduler
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train) // 32,
    epochs=20,
    validation_data=test_generator,
    validation_steps=len(X_test) // 32,
    callbacks=[reduce_lr]
)

Epoch 1/20
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6735 - loss: 0.6371

In [None]:
# Evaluate the model
def evaluate_model(model, X_test, y_test):
    start_time = time.time()
    loss, accuracy = model.evaluate(test_generator)
    end_time = time.time()
    print(f"Test Accuracy: {accuracy*100:.2f}%")
    print(f"Evaluation completed in {end_time - start_time:.2f} seconds")

evaluate_model(model, X_test, y_test)


In [None]:
# Save the trained model
model.save("deepfake_model_improved.h5")
print("Model saved as deepfake_model_improved.h5")