## Connecting to Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
import os

# Path to the zip file in Drive
zip_path = "/content/drive/MyDrive/ModelTrain/DFD/Dataset/DFD.zip"

# Unzip it to /content/
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("/content/deepfake_dataset")

# Set dataset path
dataset_path = "/content/deepfake_dataset"

In [3]:
import os

# List top-level files/folders
print(os.listdir("/content/deepfake_dataset"))

# If it's nested, explore further:
for root, dirs, files in os.walk("/content/deepfake_dataset"):
    print("Root:", root)
    print("Dirs:", dirs)
    print("Files:", files[:5])  # just print the first 5 files
    print("===")

['DFD']
Root: /content/deepfake_dataset
Dirs: ['DFD']
Files: []
===
Root: /content/deepfake_dataset/DFD
Dirs: ['DFD_manipulated_sequences', 'DFD_original sequences']
Files: []
===
Root: /content/deepfake_dataset/DFD/DFD_manipulated_sequences
Dirs: []
Files: ['01_09__talking_angry_couch__O8HNNX43.mp4', '01_04__walking_outside_cafe_disgusted__0XUW13RW.mp4', '01_11__talking_against_wall__9229VVZ3.mp4', '01_03__meeting_serious__JZUXXFRB.mp4', '01_15__walking_down_street_outside_angry__02HILKYO.mp4']
===
Root: /content/deepfake_dataset/DFD/DFD_original sequences
Dirs: []
Files: ['01__exit_phone_room.mp4', '01__outside_talking_still_laughing.mp4', '01__hugging_happy.mp4', '01__kitchen_pan.mp4', '01__outside_talking_pan_laughing.mp4']
===


 ## Module Imports

In [4]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [5]:
import tensorflow as tf

if tf.config.list_physical_devices('GPU'):
    print("✅ GPU is available:", tf.config.list_physical_devices('GPU'))
else:
    print("❌ GPU not available.")

❌ GPU not available.


## Function to extract frames from a video

In [6]:
REAL_PATH = "/content/deepfake_dataset/DFD/DFD_original sequences"
FAKE_PATH = "/content/deepfake_dataset/DFD/DFD_manipulated_sequences"

In [7]:
OUTPUT_FRAME_SIZE = (128, 128)  # Frame dimensions
FRAME_COUNT = 10  # Number of frames to extract per video

# Function to extract frames from a video
def extract_frames(video_path, output_size=(128, 128), frame_count=10):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(total_frames // frame_count, 1)  # Uniform sampling

    for i in range(frame_count):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * step)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, output_size)
        frames.append(frame)
    cap.release()
    return np.array(frames)

In [8]:
# Prepare data and labels
data = []
labels = []

# Process real videos
print("Processing real videos...")
for video_file in tqdm(os.listdir(REAL_PATH)):
    video_path = os.path.join(REAL_PATH, video_file)
    frames = extract_frames(video_path, output_size=OUTPUT_FRAME_SIZE, frame_count=FRAME_COUNT)
    if len(frames) == FRAME_COUNT:  # Ensure correct frame count
        data.append(frames)
        labels.append(0)  # Label 0 for real

Processing real videos...


100%|██████████| 10/10 [00:55<00:00,  5.57s/it]


In [9]:
# Process fake videos
print("Processing fake videos...")
for video_file in tqdm(os.listdir(FAKE_PATH)):
    video_path = os.path.join(FAKE_PATH, video_file)
    frames = extract_frames(video_path, output_size=OUTPUT_FRAME_SIZE, frame_count=FRAME_COUNT)
    if len(frames) == FRAME_COUNT:
        data.append(frames)
        labels.append(1)  # Label 1 for fake

Processing fake videos...


100%|██████████| 100/100 [09:00<00:00,  5.41s/it]


In [10]:
# Convert to numpy arrays
data = np.array(data)  # Shape: (num_videos, num_frames, 128, 128, 3)
labels = np.array(labels)

In [13]:
print(labels)

[0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [14]:
print("Data shape:", data.shape)       # Should be (num_videos, num_frames, 128, 128, 3)
print("Labels shape:", labels.shape)   # Should be (num_videos,)
print("Data type:", data.dtype)
print("Labels type:", labels.dtype)

Data shape: (110, 10, 128, 128, 3)
Labels shape: (110,)
Data type: uint8
Labels type: int64


## Spliting Dataset: 60% train, 20% test, 20% validation

In [15]:
# Split into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(data, labels, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [16]:
# Normalize data
X_train = X_train / 255.0
X_val = X_val / 255.0
X_test = X_test / 255.0

## One-hot encoded

In [17]:
# Convert labels to categorical
y_train = to_categorical(y_train, num_classes=2) # for real [1,0]  and   fake [0,1]
y_val = to_categorical(y_val, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

In [18]:
print(f"Data shapes: Train - {X_train.shape}, Validation - {X_val.shape}, Test - {X_test.shape}")

Data shapes: Train - (66, 10, 128, 128, 3), Validation - (22, 10, 128, 128, 3), Test - (22, 10, 128, 128, 3)


## Augment frames to avoit overfitting

In [20]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [21]:
# Augment frames
datagen = ImageDataGenerator(
    horizontal_flip=True,
    rotation_range=10,
    zoom_range=0.1,
    brightness_range=[0.8, 1.2]
)

In [22]:
# Function to augment extracted frames
def augment_frames(frames):
    augmented_frames = []  # Create an empty list to store augmented frames
    for frame in frames:
        frame = datagen.random_transform(frame)  # Apply random augmentation
        augmented_frames.append(frame)  # Add the augmented frame to the list
    return np.array(augmented_frames)  # Return as a NumPy array

In [23]:
augmented_data = []  # List to store augmented frames
augmented_labels = []  # List to store augmented labels

for i in range(len(X_train)):  # Loop through each video in training data
    augmented_frames = augment_frames(X_train[i])  # Augment frames of the video
    augmented_data.append(augmented_frames)  # Add the augmented frames to the list
    augmented_labels.append(y_train[i])  # Add the corresponding label to the list

In [24]:
# Combine original and augmented data
X_train_augmented = np.concatenate((X_train, np.array(augmented_data)))
y_train_augmented = np.concatenate((y_train, np.array(augmented_labels)))

In [25]:
print(f"Augmented Train Data: {X_train_augmented.shape}")

Augmented Train Data: (132, 10, 128, 128, 3)


## Training Dataset

In [26]:
import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Dense, Flatten, TimeDistributed, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout

In [27]:
def build_improved_model(input_shape=(FRAME_COUNT, 128, 128, 3)):
    model = Sequential([
        TimeDistributed(Xception(weights='imagenet', include_top=False, input_shape=(128, 128, 3))),
        TimeDistributed(Flatten()),
        Dropout(0.5),  # Add dropout for regularization
        LSTM(128, return_sequences=False),
        Dropout(0.5),  # Add dropout
        Dense(64, activation='relu'),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [28]:
model = build_improved_model()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [29]:
model.summary()

In [30]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

checkpoint_path = '/content/drive/MyDrive/ModelTrain/DFD/Model/deepfake_detection_model.keras'  # Save to Drive

checkpoint = ModelCheckpoint(
    checkpoint_path,          # File path to save the model
    monitor="val_accuracy",   # Monitor validation accuracy during training
    save_best_only=True,      # Only save model when val_accuracy improves
    verbose=1                # Print a message when model is saved
)


In [31]:
lr_scheduler = ReduceLROnPlateau(
    monitor="val_loss",  # Watch validation loss
    factor=0.5,          # Reduce learning rate by half when triggered
    patience=3,          # Wait 3 epochs with no improvement before reducing
    verbose=1            # Print a message when learning rate is reduced
)

In [32]:
# Train the model
history = model.fit(
    X_train_augmented, y_train_augmented,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=10,
    callbacks=[checkpoint, lr_scheduler]
)
model.save("deepfake_detection_model.keras")

Epoch 1/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17s/step - accuracy: 0.7577 - loss: 0.4994 
Epoch 1: val_accuracy improved from -inf to 0.86364, saving model to /content/drive/MyDrive/ModelTrain/DFD/Model/deepfake_detection_model.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m361s[0m 18s/step - accuracy: 0.7638 - loss: 0.5025 - val_accuracy: 0.8636 - val_loss: 0.7898 - learning_rate: 1.0000e-04
Epoch 2/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18s/step - accuracy: 0.9312 - loss: 0.2732 
Epoch 2: val_accuracy did not improve from 0.86364
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m318s[0m 19s/step - accuracy: 0.9297 - loss: 0.2768 - val_accuracy: 0.8636 - val_loss: 0.8050 - learning_rate: 1.0000e-04
Epoch 3/50
[1m 1/14[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4:30[0m 21s/step - accuracy: 0.8000 - loss: 0.4542

KeyboardInterrupt: 

## Accuracy Test

In [34]:
from sklearn.metrics import classification_report, accuracy_score

# Load the best saved model
from tensorflow.keras.models import load_model
model = load_model('/content/drive/MyDrive/ModelTrain/DFD/Model/deepfake_detection_model.keras')

In [36]:
# Predict class probabilities on test set
y_pred_prob = model.predict(X_test)

# Convert probabilities to class labels (argmax)
y_pred = y_pred_prob.argmax(axis=1)

# Convert one-hot encoded test labels to class indices
y_true = y_test.argmax(axis=1)

# Print accuracy
print("Test Accuracy:", accuracy_score(y_true, y_pred))

print("Classification Report:")
# Print classification report (precision, recall, f1-score)
print(classification_report(y_true, y_pred, target_names=['Real', 'Fake']))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
Test Accuracy: 0.9545454545454546
Classification Report:
              precision    recall  f1-score   support

        Real       0.00      0.00      0.00         1
        Fake       0.95      1.00      0.98        21

    accuracy                           0.95        22
   macro avg       0.48      0.50      0.49        22
weighted avg       0.91      0.95      0.93        22



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Real Time Test

In [46]:
from tensorflow.keras.models import load_model

# Load the model for real-time detection
loaded_model = load_model('/content/drive/MyDrive/ModelTrain/DFD/Model/deepfake_detection_model.keras')

def predict_video(video_path, model, output_size=(128, 128), frame_count=10):
    frames = extract_frames(video_path, output_size, frame_count)
    frames = frames / 255.0  # Normalize
    frames = np.expand_dims(frames, axis=0)  # Add batch dimension
    prediction = model.predict(frames)
    label = "FAKE" if np.argmax(prediction) == 1 else "REAL"
    confidence = prediction[0][np.argmax(prediction)]
    print(f"Prediction: {label} (Confidence: {confidence:.2f})")

# Test prediction on a video
real_sample_path = os.path.join('/content/drive/MyDrive/ModelTrain/DFD/Test/r.mp4')  # Replace with real video path
fake_sample_path = os.path.join('/content/drive/MyDrive/ModelTrain/DFD/Test/f.mp4')  # Replace with fake video path

print("Real Video Prediction:")
predict_video(real_sample_path, model)

print("Fake Video Prediction:")
predict_video(fake_sample_path, model)

Real Video Prediction:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 606ms/step
Prediction: FAKE (Confidence: 0.99)
Fake Video Prediction:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 630ms/step
Prediction: FAKE (Confidence: 0.99)
