<a href="https://colab.research.google.com/github/MANAV8527/COMPARE-THE-PERFORMANCE-OF-SGD-ADAM-AND-RMS-PROP-OPTIMIZERS/blob/main/DeepFake_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Step 1: Importing Required Libraries**

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LSTM, TimeDistributed
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import seaborn as sns

**Step 2: Define Parameters and Video Paths**

In [None]:
SEQUENCE_LENGTH = 10
FRAME_SIZE = (224, 224)
OUTPUT_FOLDER = '/content/frames'

video_paths_real = ["/content/v1.mp4", "/content/v2.mp4" , "/content/v3.mp4", "/content/v4.mp4", "/content/v5.mp4", "/content/v6.mp4", "/content/v7.mp4", "/content/v8.mp4", "/content/v9.mp4", "/content/v10.mp4", "/content/v11.mp4"]
video_paths_fake = ["/content/vs1.mp4", "/content/vs2.mp4", "/content/vs3.mp4", "/content/vs4.mp4", "/content/vs5.mp4", "/content/vs6.mp4", "/content/vs7.mp4", "/content/vs8.mp4", "/content/vs9.mp4", "/content/vs10.mp4", "/content/vs11.mp4"]

video_paths = video_paths_real + video_paths_fake

for path in video_paths:
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")
    else:
        print(f"[✓] Found: {path}")


[✓] Found: /content/v1.mp4
[✓] Found: /content/v2.mp4
[✓] Found: /content/v3.mp4
[✓] Found: /content/v4.mp4
[✓] Found: /content/v5.mp4
[✓] Found: /content/v6.mp4
[✓] Found: /content/v7.mp4
[✓] Found: /content/v8.mp4
[✓] Found: /content/v9.mp4
[✓] Found: /content/v10.mp4
[✓] Found: /content/v11.mp4
[✓] Found: /content/vs1.mp4
[✓] Found: /content/vs2.mp4
[✓] Found: /content/vs3.mp4
[✓] Found: /content/vs4.mp4
[✓] Found: /content/vs5.mp4
[✓] Found: /content/vs6.mp4
[✓] Found: /content/vs7.mp4
[✓] Found: /content/vs8.mp4
[✓] Found: /content/vs9.mp4
[✓] Found: /content/vs10.mp4
[✓] Found: /content/vs11.mp4


**Step 3: Extract Frames from Videos**

In [None]:
def extract_frames(video_path, output_folder):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"{frame_count}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_count += 1
    cap.release()


**Step 4: Preprocess Frames**

In [None]:
def preprocess_frames(frame_path):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    img = cv2.imread(frame_path)
    if img is None:
        return []
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    if len(faces) == 0:
        resized = cv2.resize(img, FRAME_SIZE) / 255.0
        return [resized]

    cropped_faces = []
    for (x, y, w, h) in faces:
        face = img[y:y+h, x:x+w]
        face = cv2.resize(face, FRAME_SIZE) / 255.0
        cropped_faces.append(face)
    return cropped_faces


**Step 5: Load Dataset (Frames + Labels)**

In [None]:
def load_dataset(video_paths, output_folder):
    frames = []
    labels = []
    for video_path in video_paths:
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        video_output_folder = os.path.join(output_folder, video_name)
        os.makedirs(video_output_folder, exist_ok=True)
        extract_frames(video_path, video_output_folder)

        frame_files = sorted([f for f in os.listdir(video_output_folder) if f.endswith('.jpg')])
        for f in frame_files:
            full_path = os.path.join(video_output_folder, f)
            faces = preprocess_frames(full_path)
            for face in faces:
                frames.append(face)
                label = 0 if "v1" in video_path or "v2" in video_path else 1
                labels.append(label)
    return np.array(frames), np.array(labels)


**Step 6: Create Sequences**

In [None]:
def create_sequences_per_video(frames, labels, sequence_length):
    sequences, sequence_labels = [], []
    current_video, current_labels = [], []

    for i in range(len(frames)):
        current_video.append(frames[i])
        current_labels.append(labels[i])

        if (i + 1) % sequence_length == 0:
            sequences.append(np.array(current_video))
            sequence_labels.append(current_labels[sequence_length // 2])  # middle frame's label
            current_video, current_labels = [], []

    return np.array(sequences), np.array(sequence_labels)

**Step 7: Prepare Training and Validation Data**

In [None]:
frames, labels = load_dataset(video_paths, OUTPUT_FOLDER)
print(f"Total frames: {len(frames)}, Labels: {len(labels)}")

frames, labels = shuffle(frames, labels, random_state=42)
train_frames, val_frames, train_labels, val_labels = train_test_split(frames, labels, test_size=0.2, random_state=42)
train_seq, train_seq_labels = create_sequences_per_video(train_frames, train_labels, SEQUENCE_LENGTH)
val_seq, val_seq_labels = create_sequences_per_video(val_frames, val_labels, SEQUENCE_LENGTH)

print(f"Train shape: {train_seq.shape}, Validation shape: {val_seq.shape}")


Total frames: 2358, Labels: 2358


**Step 8: Build CNN-LSTM Model**

In [None]:
model = Sequential([
    TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(SEQUENCE_LENGTH, 224, 224, 3)),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Conv2D(64, (3, 3), activation='relu')),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Flatten()),
    LSTM(64),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])


NameError: name 'Sequential' is not defined

**Step 9: Train the Model**

In [None]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('best_model.h5', save_best_only=True)
]

history = model.fit(train_seq, train_seq_labels,
                    validation_data=(val_seq, val_seq_labels),
                    epochs=10,
                    batch_size=8,
                    callbacks=callbacks)


**Step 10: Plot Accuracy and Loss Graph**

In [None]:
def plot_history(history):
    plt.figure(figsize=(12, 5))

    # Loss plot
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss', color='red', linewidth=2)
    plt.plot(history.history['val_loss'], label='Val Loss', color='blue', linewidth=2)
    plt.title("Loss Over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    # Accuracy plot
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy', color='green', linewidth=2)
    plt.plot(history.history['val_accuracy'], label='Val Accuracy', color='orange', linewidth=2)
    plt.title("Accuracy Over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_history(history)


**Step 11: Evaluate the Model**

In [None]:
preds = model.predict(val_seq)
preds_class = np.round(preds).flatten()
acc = accuracy_score(val_seq_labels, preds_class)

print(f"\n🎯 Accuracy: {acc:.2f}")
print("\n📋 Classification Report:\n", classification_report(val_seq_labels, preds_class))


def plot_confusion_matrix(true_labels, pred_labels):
    cm = confusion_matrix(true_labels, pred_labels)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='coolwarm', xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake'])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

plot_confusion_matrix(val_seq_labels, preds_class)


**Step 12: Show Sample Predictions**

In [None]:
def show_sample_predictions(sequences, labels, preds, num_samples=5):
    idxs = np.random.choice(len(sequences), num_samples, replace=False)
    for i, idx in enumerate(idxs):
        seq = sequences[idx]
        label = labels[idx]
        pred = int(preds[idx])

        plt.figure(figsize=(12, 1))
        for j in range(len(seq)):
            plt.subplot(1, len(seq), j+1)
            plt.imshow(seq[j])
            plt.axis('off')
        plt.suptitle(f"True: {'Real' if label==0 else 'Fake'} | Predicted: {'Real' if pred==0 else 'Fake'}",
                     fontsize=12, color='green' if label==pred else 'red')
        plt.tight_layout()
        plt.show()

show_sample_predictions(val_seq, val_seq_labels, preds_class)


In [None]:
from google.colab import drive
drive.mount('/content/drive')