In [1]:
!pip install tensorflow kaggle split-folders -q

In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [11]:
#Delete all the preexisting files in colab uploads for avoiding overwriting
!rm -rf /content/*

In [12]:
#Downloading datasets
#Dataset1
# !kaggle competitions download -c deepfake-detection-challenge
# !unzip deepfake-detection-challenge.zip -d /content/deepfake-detection-challenge

#Dataset2
!kaggle datasets download -d hungle3401/faceforensics
!unzip faceforensics.zip -d faceforensics

Dataset URL: https://www.kaggle.com/datasets/hungle3401/faceforensics
License(s): DbCL-1.0
Archive:  faceforensics.zip
  inflating: faceforensics/FF++/fake/01_02__outside_talking_still_laughing__YVGY8LOK.mp4  
  inflating: faceforensics/FF++/fake/01_02__walk_down_hall_angry__YVGY8LOK.mp4  
  inflating: faceforensics/FF++/fake/01_03__hugging_happy__ISF9SP4G.mp4  
  inflating: faceforensics/FF++/fake/01_03__podium_speech_happy__480LQD1C.mp4  
  inflating: faceforensics/FF++/fake/01_03__talking_against_wall__JZUXXFRB.mp4  
  inflating: faceforensics/FF++/fake/01_11__meeting_serious__9OM3VE0Y.mp4  
  inflating: faceforensics/FF++/fake/01_11__secret_conversation__4OJNJLOO.mp4  
  inflating: faceforensics/FF++/fake/01_11__talking_against_wall__9229VVZ3.mp4  
  inflating: faceforensics/FF++/fake/01_11__walking_outside_cafe_disgusted__FAFWDR4W.mp4  
  inflating: faceforensics/FF++/fake/01_12__outside_talking_pan_laughing__TNI7KUZ6.mp4  
  inflating: faceforensics/FF++/fake/01_20__outside_talki

In [13]:
import tensorflow as tf
import os
import splitfolders
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define image size, batch size, and data directories
IMG_HEIGHT, IMG_WIDTH = 300, 300
BATCH_SIZE = 32

# Path to your dataset
input_folder = '/content/faceforensics/FF++'
output_folder = '/content/output_faceforensics_FF++'

# Define paths to training, validation, and test directories
TRAIN_DIR = os.path.join(output_folder, 'train')
VAL_DIR = os.path.join(output_folder, 'val')
TEST_DIR = os.path.join(output_folder, 'test')

# Split dataset: 80% train, 10% val, 10% test
splitfolders.ratio(input_folder, output=output_folder, seed=1337, ratio=(.8, .1, .1))

Copying files: 400 files [00:47,  8.45 files/s]


In [14]:
import os
import cv2

def extract_frames_from_video(video_path, output_folder, frame_rate=1):
    """
    Extract frames from a video and save them as images.
    Extracts one frame per second by default.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps == 0:
        print(f"Warning: Could not determine FPS for {video_path}. Using default 30 FPS.")
        fps = 30

    frame_interval = int(fps * frame_rate)
    frame_count = 0
    saved_frame_count = 0
    success, frame = cap.read()

    while success:
        if frame_count % frame_interval == 0:
            image_path = os.path.join(output_folder, f"frame_{saved_frame_count:05d}.jpg")
            cv2.imwrite(image_path, frame)
            saved_frame_count += 1
        frame_count += 1
        success, frame = cap.read()
    cap.release()
    print(f"Extracted {saved_frame_count} frames from {video_path}")

# Base directories for videos and output frames
video_base_dir = '/content/faceforensics/FF++'  # Videos stored here, with subfolders "real" and "fake"
frames_output_dir = '/content/faceforensics_frames'  # Where extracted frames will be saved

# Process only the first 3 videos per class ("real" and "fake")
classes = ['real', 'fake']
for cls in classes:
    video_dir = os.path.join(video_base_dir, cls)
    output_class_dir = os.path.join(frames_output_dir, cls)
    if not os.path.exists(output_class_dir):
        os.makedirs(output_class_dir)

    # Get list of MP4 videos and process only the first 25 mp4 videos
    video_files = [f for f in os.listdir(video_dir) if f.lower().endswith('.mp4')]
    for video_file in video_files:
        video_path = os.path.join(video_dir, video_file)
        # Create a subfolder for each video to avoid filename conflicts (optional)
        video_name = os.path.splitext(video_file)[0]
        output_video_dir = os.path.join(output_class_dir, video_name)
        os.makedirs(output_video_dir, exist_ok=True)
        extract_frames_from_video(video_path, output_video_dir, frame_rate=1)


Extracted 39 frames from /content/faceforensics/FF++/real/14__outside_talking_still_laughing.mp4
Extracted 40 frames from /content/faceforensics/FF++/real/11__secret_conversation.mp4
Extracted 16 frames from /content/faceforensics/FF++/real/08__exit_phone_room.mp4
Extracted 7 frames from /content/faceforensics/FF++/real/14__hugging_happy.mp4
Extracted 44 frames from /content/faceforensics/FF++/real/01__meeting_serious.mp4
Extracted 38 frames from /content/faceforensics/FF++/real/01__podium_speech_happy.mp4
Extracted 39 frames from /content/faceforensics/FF++/real/12__outside_talking_pan_laughing.mp4
Extracted 43 frames from /content/faceforensics/FF++/real/06__walking_and_outside_surprised.mp4
Extracted 37 frames from /content/faceforensics/FF++/real/15__kitchen_still.mp4
Extracted 40 frames from /content/faceforensics/FF++/real/09__outside_talking_still_laughing.mp4
Extracted 42 frames from /content/faceforensics/FF++/real/10__walking_down_street_outside_angry.mp4
Extracted 43 frames 

In [15]:
# Step 2: Flatten the directory structure so images are directly inside class folders.
import shutil

def flatten_directory(source_dir, target_dir):
    """
    Move all image files from nested subdirectories of source_dir into target_dir.
    """
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                source_file = os.path.join(root, file)
                target_file = os.path.join(target_dir, file)
                # If duplicate names exist, consider modifying target_file to avoid overwriting
                shutil.move(source_file, target_file)

flattened_frames_dir = '/content/faceforensics_frames_flat'
for cls in classes:
    source_class_dir = os.path.join(frames_output_dir, cls)
    target_class_dir = os.path.join(flattened_frames_dir, cls)
    flatten_directory(source_class_dir, target_class_dir)

# Optional: Verify flattened structure (lists first 5 files for each class)
for cls in classes:
    target_class_dir = os.path.join(flattened_frames_dir, cls)
    print(f"Class '{cls}' has {len(os.listdir(target_class_dir))} images. Sample files:", os.listdir(target_class_dir)[:5])


Class 'real' has 68 images. Sample files: ['frame_00012.jpg', 'frame_00011.jpg', 'frame_00026.jpg', 'frame_00058.jpg', 'frame_00028.jpg']
Class 'fake' has 66 images. Sample files: ['frame_00012.jpg', 'frame_00011.jpg', 'frame_00026.jpg', 'frame_00058.jpg', 'frame_00028.jpg']


In [16]:
# Step 3: Split the flattened dataset into train, validation, and test sets using splitfolders
import splitfolders

input_folder = flattened_frames_dir  # This folder now contains 'real' and 'fake'
output_folder = '/content/output_faceforensics_frames'
# Splitting into 80% train, 10% validation, 10% test
splitfolders.ratio(input_folder, output=output_folder, seed=1337, ratio=(0.8, 0.1, 0.1))

# Define paths for each split
train_dir = os.path.join(output_folder, 'train')
val_dir = os.path.join(output_folder, 'val')
test_dir = os.path.join(output_folder, 'test')


Copying files: 134 files [00:00, 547.54 files/s]


In [17]:
# Step 4: Create data generators using Keras ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input

IMG_HEIGHT, IMG_WIDTH = 300, 300
BATCH_SIZE = 32

# Data augmentation for training; simple preprocessing for validation/test
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)
val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

print("Train samples:", train_generator.samples)
print("Validation samples:", val_generator.samples)
print("Test samples:", test_generator.samples)

Found 106 images belonging to 2 classes.
Found 12 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Train samples: 106
Validation samples: 12
Test samples: 16


In [None]:
# Step 5: Build and compile the EfficientNetB3 model
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

# Load EfficientNetB3 without the top classification layers
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.001))(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model for initial training
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Step 6: Train the model
model.fit(
    train_generator,
    epochs=500,
    validation_data=val_generator
)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
[1m43941136/43941136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 11s/step - accuracy: 0.5518 - loss: 1.0367 - val_accuracy: 0.7500 - val_loss: 0.7963
Epoch 2/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 840ms/step - accuracy: 0.6864 - loss: 0.8656 - val_accuracy: 0.8333 - val_loss: 0.7151
Epoch 3/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 840ms/step - accuracy: 0.7721 - loss: 0.7661 - val_accuracy: 0.8333 - val_loss: 0.6672
Epoch 4/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.8754 - loss: 0.6715 - val_accuracy: 0.8333 - val_loss: 0.6306
Epoch 5/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 820ms/step - accuracy: 0.8529 - loss: 0.6164 - val_accuracy: 0.8333 - val_loss: 0.5952
Epoch 6/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.8491 - loss: 0.6205 - val_accuracy: 0.8333 - val_loss: 0.5600
Epoch 7/500
[1m4/4[0m [32m━━━━━━━━━━━━

In [None]:
# Save the model in Keras v3 format
model.save('/content/EB3_initial.keras')

from google.colab import files
files.download('/content/EB3_initial.keras')

In [None]:
import numpy as np
#Using the model for making prediction
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image # Import the 'image' module
from tensorflow.keras.applications.efficientnet import preprocess_input # Assuming you want to use EfficientNet's preprocessing

model = load_model('/content/EB3_initial.keras')

# Path to the image you want to predict on
img_path = '/content/sample_fake.jpg'

# Load and preprocess the image
img = image.load_img(img_path, target_size=(300, 300))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)  # Create batch dimension
img_array = preprocess_input(img_array)

# Make the prediction
prediction = model.predict(img_array)

# Interpret the prediction (threshold = 0.5)
if prediction[0][0] > 0.5:
    print("Predicted: Fake")
else:
    print("Predicted: Real")

In [None]:
#Model evaluation on the test dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Define test data directory and parameters
test_dir = "path_to_test_folder"  # Update with actual test folder path
batch_size = 32
img_height, img_width = IMG_HEIGHT, IMG_WIDTH  # Ensure these match training

# Data Generator for Test Set
test_datagen = ImageDataGenerator(rescale=1.0/255)  # Rescale pixel values
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',  # Ensure it matches your training class mode
    shuffle=False  # No shuffling to align predictions with labels
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Generate Predictions
y_pred_probs = model.predict(test_generator)
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert to binary class (0 or 1)
y_true = test_generator.classes  # True labels

# Compute Classification Metrics
print("Classification Report:")
print(classification_report(y_true, y_pred))

# Compute Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))
