In [1]:
import os
import cv2
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import random

## Extracting video frames and loading them

In [2]:
def extract_frames(video_path, label, max_frames=10):
    video_frames = []
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        # Ensure the frame has 3 channels (convert grayscale to RGB)
        if len(frame.shape) == 2:
            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
        video_frames.append([frame, label])
        frame_count += 1
    cap.release()
    return video_frames


def load_data(paths, label):
    start_time = time.time()
    data = []
    for path in paths:
        print(f"Processing path: {path}")
        for video in os.listdir(path):
            video_path = os.path.join(path, video)
            data.extend(extract_frames(video_path, label))
    end_time = time.time()
    print(f"Loaded data from {paths}. Time taken: {end_time - start_time:.2f} seconds")
    return data


## Paths for datasets

In [3]:
real_paths = ["D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-real",
             "D:\\Dataset_Celeb_df\\Celeb-DF\\YouTube-real"]
synthetic_paths = ["D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-synthesis"]

## Loading and labeling data

In [7]:

real_data = load_data(real_paths, 0) # 0 for real
synthetic_data = load_data(synthetic_paths, 1) # 1 for synthetic


Processing path: D:\Dataset_Celeb_df\Celeb-DF\Celeb-real
Processing path: D:\Dataset_Celeb_df\Celeb-DF\YouTube-real
Loaded data from ['D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-real', 'D:\\Dataset_Celeb_df\\Celeb-DF\\YouTube-real']. Time taken: 23.82 seconds
Processing path: D:\Dataset_Celeb_df\Celeb-DF\Celeb-synthesis
Loaded data from ['D:\\Dataset_Celeb_df\\Celeb-DF\\Celeb-synthesis']. Time taken: 53.75 seconds


## Combine and split data

In [51]:
all_data = real_data + synthetic_data
X, y = zip(*all_data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# After splitting the data
X_train = np.array([x for x in X_train])  # Convert list of frames to 4D array
X_test = np.array([x for x in X_test])    # Same for test data
y_train = np.array(y_train)
y_test = np.array(y_test)

# Verify shapes
print(f"X_train shape: {X_train.shape}")  # Should be (num_samples, 224, 224, 3)
print(f"y_train shape: {y_train.shape}")  # Should be (num_samples,)

X_train shape: (7856, 224, 224, 3)
y_train shape: (7856,)


## Data Augmentation for training data

In [53]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow(np.array(X_train), np.array(y_train), batch_size=32)

def build_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

my_model = build_model()
# Compute class weights
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Train the model

In [None]:
import tensorflow as tf
import numpy as np
import time

# Validation Data Generator (No Augmentation, Only Rescaling)
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow(
    X_test,  
    y_test,  
    batch_size=32,
    shuffle=False  
)

def train_model(model, train_generator, X_test, y_test, class_weights, epochs=5):
    start_time = time.time()

    # Debug: Check generator output
    X_batch, y_batch = next(iter(train_generator))
    print("X_batch type:", type(X_batch))
    print("X_batch shape:", X_batch.shape)
    print("X_batch dtype:", X_batch.dtype)
    print("y_batch type:", type(y_batch))
    print("y_batch shape:", y_batch.shape)
    print("y_batch dtype:", y_batch.dtype)

    # Normalize test data
    X_test = np.array(X_test, dtype=np.float32) / 255.0
    y_test = np.array(y_test, dtype=np.int32)

    # Fix steps_per_epoch
    steps_per_epoch = train_generator.n // train_generator.batch_size 

    # Training with Proper Validation Generator
    history = my_model.fit(
    train_generator,
    steps_per_epoch=len(X_train) // 32,
    epochs=5,  # Train for more epochs
    validation_data=test_generator,  
    validation_steps=len(X_test) // 32  
)


    end_time = time.time()
    print(f"Training completed in {end_time - start_time:.2f} seconds")
    
    return history

# Call function correctly
train_history = train_model(my_model, train_generator, X_test, y_test, class_weights, epochs=5)



X_batch type: <class 'numpy.ndarray'>
X_batch shape: (32, 224, 224, 3)
X_batch dtype: float32
y_batch type: <class 'numpy.ndarray'>
y_batch shape: (32,)
y_batch dtype: int32


  self._warn_if_super_not_called()


Epoch 1/5
[1m209/245[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m1:41[0m 3s/step - accuracy: 0.6726 - loss: 0.6863

### Evaluate the Model

In [39]:
def evaluate_model(model, X_test, y_test):
    start_time = time.time()
    loss, accuracy = model.evaluate(np.array(X_test) / 255.0, np.array(y_test))
    end_time = time.time()
    print(f"Test Accuracy: {accuracy*100:.2f}%")
    print(f"Evaluation completed in {end_time - start_time:.2f} seconds")

evaluate_model(my_model, X_test, y_test)

[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 740ms/step - accuracy: 0.7556 - loss: 0.5229
Test Accuracy: 74.16%
Evaluation completed in 51.84 seconds


In [41]:
# Save the trained model
my_model.save("deepfake_model.h5")  # Saves in HDF5 format

