    Configure your Graphics Card 

In [1]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth set")
    except RuntimeError as e:
        print(e)

tf.config.experimental.get_memory_info('GPU:0')


2025-10-25 15:05:22.603574: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-25 15:05:22.643297: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-25 15:05:23.981802: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Memory growth set


I0000 00:00:1761384925.546842    5048 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2071 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:02:00.0, compute capability: 8.6


{'current': 0, 'peak': 0}

    Import Dependencies

In [None]:
# import warnings
# warnings.filterwarnings("ignore")

import cv2
import gc
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing import image # type: ignore
from tensorflow.keras.preprocessing.image import ImageDataGenerator # type: ignore
from tensorflow.keras.models import Sequential,load_model,Model # type: ignore
from tensorflow.keras.layers import GlobalAveragePooling2D ,Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input# type: ignore
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau # type: ignore
from tensorflow.keras.applications import EfficientNetB0,EfficientNetB4 # type: ignore
from tensorflow.keras.applications.efficientnet import preprocess_input # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.utils import get_file # type: ignore


from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    confusion_matrix,
    classification_report,
    f1_score
)

from imblearn.over_sampling import RandomOverSampler

import gradio as gr
import datetime

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from tqdm import tqdm as notebook_tqdm
from pathlib import Path

print("All dependencies imported successfully.")

All dependencies imported successfully.


    Gather the data

Dataset paths

In [3]:
data_path = Path("/home/uppercase/Workspace/Projects/Deepfake_Detection/dataset")
train_path = data_path / "Train"
val_path = data_path / "Validation"
test_path = data_path / "Test"

In [4]:
def load_data_from_directory(directory):
    files_names = []
    labels = []

    for file in  sorted(directory.glob("**/*.jpg")):
        files_names.append(str(file))
        label = file.parent.name
        labels.append(label)

    return files_names, labels


In [5]:
train_files, train_labels = load_data_from_directory(train_path)
val_files, val_labels = load_data_from_directory(val_path)
test_files, test_labels = load_data_from_directory(test_path)

In [6]:
print(len(train_files), len(train_labels))
print(len(val_files), len(val_labels))
print(len(test_files), len(test_labels))

140002 140002
39428 39428
10905 10905


    Generate Dataframes

In [7]:
train_df = pd.DataFrame({
    'image': train_files,
    'label': train_labels
})

val_df = pd.DataFrame({
    'image': val_files,
    'label': val_labels
})
test_df = pd.DataFrame({
    'image': test_files,
    'label': test_labels
})
print("DataFrames created successfully.")

DataFrames created successfully.


In [8]:
train_df.head()

Unnamed: 0,image,label
0,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake
1,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake
2,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake
3,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake
4,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake


In [9]:
train_df['label'].unique()

array(['Fake', 'Real'], dtype=object)

X,Y Train Split

In [10]:
X_train = train_df.drop('label',axis=1)
y_train = train_df['label']

ros = RandomOverSampler(random_state=83)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)

train_df = pd.DataFrame(X_resampled, columns=X_train.columns)
train_df['label'] = y_resampled.astype(str)

del X_train
del y_train

gc.collect()


0

In [11]:
train_df.shape

(140002, 2)

    Preprocess the data 

In [12]:
train_df.dropna(subset=['label'], inplace=True)
test_df.dropna(subset=['label'], inplace=True)
val_df.dropna(subset=['label'], inplace=True)

In [13]:
train_df['label'].value_counts()

label
Fake    70001
Real    70001
Name: count, dtype: int64

In [14]:
for df, name in [(train_df, 'train_df'), (val_df, 'val_df'), (test_df, 'test_df')]:
    invalid_labels = df[~df['label'].isin(['Real', 'Fake'])]
    if not invalid_labels.empty:
        print(f"Warning: Invalid labels found in {name}: {invalid_labels['label'].unique()}")

In [15]:
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)
test_df['label'] = test_df['label'].astype(str)

Label Mapping 

In [16]:
label2id = {'Real' : 0, 'Fake': 1}
id2label = {0 : 'Real', 1: 'Fake'}

train_df['label_int'] = train_df['label'].map(label2id)
val_df['label_int'] = val_df['label'].map(label2id)
test_df['label_int'] = test_df['label'].map(label2id)


In [17]:
train_df.head()

Unnamed: 0,image,label,label_int
0,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake,1
1,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake,1
2,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake,1
3,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake,1
4,/home/uppercase/Workspace/Projects/Deepfake_De...,Fake,1


Image datagenerator

In [18]:
IMG_SIZE = (224,224)
BATCH_SIZE = 16

In [19]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,rescale=1./255)

In [None]:
train_datagen = train_datagen.flow_from_dataframe(
    train_df,
    x_col='image',
    y_col='label',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='rgb'
)

val_datagen = val_test_datagen.flow_from_dataframe(
    val_df,
    x_col='image',
    y_col='label',
    target_size=IMG_SIZE,base_model = EfficientNetB0(weights=None, include_top=False, input_shape=(224,224,3))

    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False
)

test_datagen = val_test_datagen.flow_from_dataframe(
    test_df,
    x_col='image',
    y_col='label',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False
)

Found 140002 validated image filenames belonging to 2 classes.
Found 39428 validated image filenames belonging to 2 classes.
Found 10905 validated image filenames belonging to 2 classes.


In [21]:
train_labels = train_df['label_int'].values
val_labels = val_df['label_int'].values
test_labels = test_df['label_int'].values

In [22]:
train_datagen.samples

140002

In [23]:
print(train_df.head())
print(val_df.head())
print(test_df.head())

                                               image label  label_int
0  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
1  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
2  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
3  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
4  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
                                               image label  label_int
0  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
1  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
2  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
3  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
4  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
                                               image label  label_int
0  /home/uppercase/Workspace/Projects/Deepfake_De...  Fake          1
1  /home/uppercase/W

    Model Train 

In [24]:
print("Keras image_data_format:", tf.keras.backend.image_data_format())

Keras image_data_format: channels_last


In [25]:
tf.keras.backend.clear_session()   # important


In [None]:
weights_url = "https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5"
weights_path = get_file(
    fname="efficientnetb4_notop.h5",
    origin=weights_url,
    cache_subdir="models"
)
print("Downloaded weights to:", weights_path)
print("weights file size (MB):", round(Path(weights_path).stat().st_size / (1024*1024), 2))

tf.keras.backend.clear_session()
base_model = EfficientNetB4(weights=None, include_top=False, input_shape=(224,224,3))
stem = base_model.get_layer("stem_conv")
print("Built base (weights=None). stem_conv kernel shape (before load):", tuple(stem.weights[0].shape))

base_model.load_weights(weights_path)
print("Weights loaded successfully.")

stem_after = base_model.get_layer("stem_conv")
print("stem_conv kernel shape (after load):", tuple(stem_after.weights[0].shape))
print("Base model input shape:", base_model.input_shape)

x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(2, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(
    train_datagen,
    epochs=5,
    validation_data=val_datagen
)
model.save("../models/deepfake_detector_efficientnetb4.h5")

model.summary()

Downloaded weights to: /home/uppercase/.keras/models/efficientnetb0_notop.h5
weights file size (MB): 15.93
Built base (weights=None). stem_conv kernel shape (before load): (3, 3, 3, 32)
Weights loaded successfully.
stem_conv kernel shape (after load): (3, 3, 3, 32)
Base model input shape: (None, 224, 224, 3)


In [None]:
# base_model.trainable = False
# model = Sequential([
#     base_model,
#     GlobalAveragePooling2D(),
#     Dense(256, activation='relu'),
#     Dropout(0.5),
#     Dense(2, activation='softmax')
# ])
# model.summary()


In [None]:
# model.compile(optimizer=Adam(),
#               loss='categorical_crossentropy',
#                 metrics=['accuracy',tf.keras.metrics.AUC(name='AUC')])

In [28]:
log_dir = "../logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
callbacks = [
    TensorBoard(
        log_dir=log_dir,
        histogram_freq=1
    ),

    ModelCheckpoint(
        'deepfake_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),

    EarlyStopping(
        monitor='val_loss',
        patience=6,
        restore_best_weights=True
    ),

    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )

]


In [29]:
gc.collect()

13

In [None]:
for layer in base_model.layers[-20:]:
    layer.trainable = True

# Step 4: Recompile with a lower learning rate for fine-tuning
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Step 5: Continue training (fine-tuning)
model.fit(
    train_datagen,
    epochs=5,
    validation_data=val_datagen
)

# Step 6: Save the fine-tuned model
model.save("deepfake_detector_efficientnetb4_finetuned.h5")

In [None]:
# history = model.fit(
#     train_datagen,
#     epochs= 12,
#     validation_data=val_datagen,
#     callbacks=callbacks
# )

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    '/home/uppercase/Workspace/Projects/Deepfake_Detection/dataset/Test',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

In [None]:
test_loss, test_accuracy, test_auc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test AUC: {test_auc:.4f}")


In [None]:
model.save("deepfake_detection_model.h5")

In [None]:
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = test_generator.classes
