In [None]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model
from tensorflow.keras.metrics import AUC

In [None]:
main_path = 'datasets/140k-real-and-fake-faces/real_vs_fake/real-vs-fake'
second_main_path = 'datasets/deepfake-and-real-images/Dataset'

def create_dataframe(base_path, sub_dir, label_map):
    data = {"file_path": [], "label": []}
    for label_dir, label in label_map.items():
        folder_path = os.path.join(base_path, sub_dir, label_dir)
        if os.path.exists(folder_path):
            for img_file in os.listdir(folder_path):
                data["file_path"].append(os.path.join(folder_path, img_file))
                data["label"].append(label)
    return pd.DataFrame(data)

label_mapping = {'real': 1, 'fake': 0, 'Real': 1, 'Fake': 0}
sub_dirs_mapping = {'train': ['train', 'Train'], 'test': ['test', 'Test'], 'valid': ['valid', 'Validation']}


In [None]:
for sub_dir_key, sub_dirs in sub_dirs_mapping.items():
    combined_data = pd.DataFrame()
    for base_path, sub_dir in [(main_path, sub_dirs[0]), (second_main_path, sub_dirs[1])]:
        df = create_dataframe(base_path, sub_dir, label_mapping)
        combined_data = pd.concat([combined_data, df], ignore_index=True)
    csv_path = f"{sub_dir_key}.csv"
    combined_data.to_csv(csv_path, index=False)

train_df = pd.read_csv("train.csv").sample(frac=1, random_state=42).reset_index(drop=True)
valid_df = pd.read_csv("valid.csv").sample(frac=1, random_state=42).reset_index(drop=True)
test_df = pd.read_csv("test.csv").sample(frac=1, random_state=42).reset_index(drop=True)

train_df['label'] = train_df['label'].astype(str)
valid_df['label'] = valid_df['label'].astype(str)
test_df['label'] = test_df['label'].astype(str)

In [None]:
IMAGE_SIZE = (256, 256)
BATCH_SIZE = 64

train_datagen = ImageDataGenerator(rescale=1.0/255, horizontal_flip=True)
datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='file_path',
    y_col='label',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

valid_generator = datagen.flow_from_dataframe(
    dataframe=valid_df,
    x_col='file_path',
    y_col='label',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

test_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='file_path',
    y_col='label',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

In [None]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    base_model = DenseNet121(weights="imagenet", include_top=False, input_shape=(256, 256, 3))
    x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
    x = tf.keras.layers.Dense(512, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.models.Model(inputs=base_model.input, outputs=x)
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", AUC(name='auc')])

plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
checkpoint_path = "Combined_best_model.keras"

model_checkpoint = ModelCheckpoint(
    checkpoint_path,
    monitor="val_accuracy",
    save_best_only=True,
    mode="max",
    verbose=1
)

history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=11,
    callbacks=[model_checkpoint]
)

In [None]:
model.load_weights(checkpoint_path)
test_loss, test_accuracy, test_auc = model.evaluate(test_generator)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy:.2f}")
print(f"Test AUC: {test_auc:.2f}")