# 🧠 Deepfake Detection using EfficientNetB4
This notebook extracts frames from Celeb-DF v2 videos and trains an EfficientNetB4 model to classify them as real or fake.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import zipfile
import os

# Define the path to your zip file in Google Drive
zip_path = "/content/drive/MyDrive/deepfake_project1/archive.zip"
extract_path = "/content/celeb-df-v2"

# Unzip the dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Dataset extracted to:", extract_path)


In [None]:
import cv2
import os

# Create output directories
real_frame_dir = "/content/frames/real"
fake_frame_dir = "/content/frames/fake"
os.makedirs(real_frame_dir, exist_ok=True)
os.makedirs(fake_frame_dir, exist_ok=True)

# Function to extract 1 frame per video
def extract_frames(video_dir, output_dir, label):
    count = 0
    for filename in os.listdir(video_dir):
        if not filename.lower().endswith('.mp4'):
            continue
        video_path = os.path.join(video_dir, filename)
        cap = cv2.VideoCapture(video_path)
        success, frame = cap.read()
        if success:
            frame_path = os.path.join(output_dir, f"{label}_{count}.jpg")
            cv2.imwrite(frame_path, frame)
            count += 1
        cap.release()

# Extract from Celeb-real and Celeb-synthesis
extract_frames("/content/celeb-df-v2/Celeb-real", real_frame_dir, "real")
extract_frames("/content/celeb-df-v2/Celeb-synthesis", fake_frame_dir, "fake")
print("✅ Frame extraction completed.")


In [None]:
import os
import shutil
import random

# Destination folders
train_real = "/content/frames/train/real"
train_fake = "/content/frames/train/fake"
val_real = "/content/frames/val/real"
val_fake = "/content/frames/val/fake"

# Create required folders
for folder in [train_real, train_fake, val_real, val_fake]:
    os.makedirs(folder, exist_ok=True)

# Helper function to split and copy
def split_and_copy(source_dir, train_dir, val_dir, split_ratio=0.8):
    files = os.listdir(source_dir)
    random.shuffle(files)
    split_index = int(len(files) * split_ratio)
    train_files = files[:split_index]
    val_files = files[split_index:]

    for f in train_files:
        shutil.copy(os.path.join(source_dir, f), os.path.join(train_dir, f))
    for f in val_files:
        shutil.copy(os.path.join(source_dir, f), os.path.join(val_dir, f))

# Apply split to both real and fake
split_and_copy("/content/frames/real", train_real, val_real)
split_and_copy("/content/frames/fake", train_fake, val_fake)
print("✅ Frame split complete — Train/Val folders are now ready.")


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = 380  # EfficientNetB4 input size
batch_size = 16

train_dir = '/content/frames/train'
val_dir = '/content/frames/val'

train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=10,
    shear_range=0.1,
    zoom_range=0.1
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary'
)


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Load EfficientNetB4 with pre-trained ImageNet weights, exclude the top layer
base_model = EfficientNetB4(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))
base_model.trainable = False  # Freeze the base model

# Add custom classification layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.25)(x)
predictions = Dense(1, activation='sigmoid')(x)  # Binary classification

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint_path = "/content/drive/MyDrive/deepfake_project1/best_model_b4.h5"
early_stop = EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

# Train the model
history = model.fit(
    train_generator,
    epochs=15,
    validation_data=val_generator,
    callbacks=[early_stop, checkpoint]
)
