In [1]:
# ---- Global config & paths ----
import pathlib, yaml

ROOT = pathlib.Path().resolve()
CFG = yaml.safe_load(open(ROOT.parent / "configs" / "base.yaml"))

DATA_DIR = ROOT / CFG["paths"]["data_dir"]
WEIGHTS_DIR = ROOT / CFG["paths"]["weights_dir"]
WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
WEIGHTS_PATH = WEIGHTS_DIR / CFG["paths"]["weights_name"]

RUNS_DIR = ROOT / CFG["paths"]["runs_dir"]
RUNS_DIR.mkdir(parents=True, exist_ok=True)

print("DATA_DIR:", DATA_DIR)
print("WEIGHTS_PATH:", WEIGHTS_PATH)
print("RUNS_DIR:", RUNS_DIR)

DATA_DIR: /Users/igazielinska/Documents/projects/deepfake-detector/notebooks/data
WEIGHTS_PATH: /Users/igazielinska/Documents/projects/deepfake-detector/notebooks/models/deepfake_detector_final.h5
RUNS_DIR: /Users/igazielinska/Documents/projects/deepfake-detector/notebooks/runs


In [2]:
import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Concatenate


In [3]:
selected_indices = [0, 4, 9, 10, 33, 46, 54, 55, 58, 127, 149, 175, 205, 263, 276, 284, 285, 288, 356, 400, 425, 468, 473]

In [4]:
def load_images_and_landmarks(selected_indices):
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, refine_landmarks=True)

    def process_folder(folder_path, label):
        images, landmarks, labels = [], [], []
        for root, _, files in os.walk(folder_path):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(root, file)
                    image = cv2.imread(img_path)
                    if image is None:
                        continue
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    resized_image = cv2.resize(image, (256, 256))
                    results = face_mesh.process(resized_image)
                    if results.multi_face_landmarks:
                        face_landmarks = results.multi_face_landmarks[0]
                        landmark_coords = [
                            (lm.x, lm.y, lm.z)
                            for i, lm in enumerate(face_landmarks.landmark)
                            if i in selected_indices
                        ]
                        images.append(resized_image)
                        landmarks.append(landmark_coords)
                        labels.append(label)
        return (
            np.asarray(images, dtype=np.uint8),
            np.asarray(landmarks, dtype=np.float32),
            np.asarray(labels, dtype=np.int64),
        )

    def load_data(subset):
        subset_images, subset_landmarks, subset_labels = [], [], []
        subset_path = DATA_DIR / subset
        for label in [0, 1]:
            label_path = subset_path / str(label)
            if not label_path.exists():
                continue
            for subfolder in os.listdir(label_path):
                subfolder_path = label_path / subfolder
                if subfolder_path.is_dir():
                    images, landmarks, labels = process_folder(str(subfolder_path), label)
                    subset_images.append(images)
                    subset_landmarks.append(landmarks)
                    subset_labels.append(labels)
        return (
            np.concatenate(subset_images, axis=0),
            np.concatenate(subset_landmarks, axis=0),
            np.concatenate(subset_labels, axis=0),
        )

    X_train_images, X_train_landmarks, y_train = load_data("train")
    X_test_images, X_test_landmarks, y_test = load_data("test")
    X_val_images, X_val_landmarks, y_val = load_data("validate")

    return (
        X_train_images, X_train_landmarks, y_train,
        X_test_images, X_test_landmarks, y_test,
        X_val_images, X_val_landmarks, y_val,
    )

In [5]:
X_train_images, X_train_landmarks, y_train, \
X_test_images, X_test_landmarks, y_test, \
X_val_images, X_val_landmarks, y_val = load_images_and_landmarks(selected_indices)

I0000 00:00:1760718353.586755 7448313 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1760718353.599260 7449333 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1760718353.604828 7449328 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


ValueError: need at least one array to concatenate

In [5]:
print("Train Images Shape:", X_train_images.shape)
print("Train Landmarks Shape:", X_train_landmarks.shape)
print("Train Labels Shape:", y_train.shape)
print("Test Images Shape:", X_test_images.shape)
print("Test Landmarks Shape:", X_test_landmarks.shape)
print("Test Labels Shape:", y_test.shape)
print("Validation Images Shape:", X_val_images.shape)
print("Validation Landmarks Shape:", X_val_landmarks.shape)
print("Validation Labels Shape:", y_val.shape)

Train Images Shape: (39992, 256, 256, 3)
Train Landmarks Shape: (39989, 21, 3)
Train Labels Shape: (39989,)
Test Images Shape: (14000, 256, 256, 3)
Test Landmarks Shape: (13995, 21, 3)
Test Labels Shape: (13995,)
Validation Images Shape: (24190, 256, 256, 3)
Validation Landmarks Shape: (24187, 21, 3)
Validation Labels Shape: (24187,)


In [10]:
def filter_data(images, landmarks, labels):
    filtered_images = []
    filtered_landmarks = []
    filtered_labels = []

    for img, lnd, lbl in zip(images, landmarks, labels):
        if len(lnd) == 49:
            filtered_images.append(img)
            filtered_landmarks.append(lnd)
            filtered_labels.append(lbl)

    return (np.array(filtered_images), np.array(filtered_landmarks), np.array(filtered_labels))

In [11]:
X_train_images, X_train_landmarks, y_train = filter_data(X_train_images, X_train_landmarks, y_train)
X_test_images, X_test_landmarks, y_test = filter_data(X_test_images, X_test_landmarks, y_test)
X_val_images, X_val_landmarks, y_val = filter_data(X_val_images, X_val_landmarks, y_val)


assert X_train_images.shape[0] == X_train_landmarks.shape[0] == y_train.shape[0], "Mismatch in number of training samples"
assert X_test_images.shape[0] == X_test_landmarks.shape[0] == y_test.shape[0], "Mismatch in number of test samples"
assert X_val_images.shape[0] == X_val_landmarks.shape[0] == y_val.shape[0], "Mismatch in number of validation samples"



In [12]:
np.save(RUNS_DIR / 'X_train_images.npy', X_train_images)
np.save(RUNS_DIR / 'X_train_landmarks.npy', X_train_landmarks)
np.save(RUNS_DIR / 'y_train.npy', y_train)

In [13]:
np.save(RUNS_DIR / 'X_test_images.npy', X_test_images)
np.save(RUNS_DIR / 'X_test_landmarks.npy', X_test_landmarks)
np.save(RUNS_DIR / 'y_test.npy', y_test)

In [14]:
np.save(RUNS_DIR / 'X_val_images.npy', X_val_images)
np.save(RUNS_DIR / 'X_val_landmarks.npy', X_val_landmarks)
np.save(RUNS_DIR / 'y_val.npy', y_val)

MODEL:
-------------------

In [15]:
image_input = Input(shape=(256, 256, 3))
x = Conv2D(32, (3, 3), activation='relu')(image_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
img_output = Dense(1, activation='sigmoid')(x)

landmark_input = Input(shape=(21, 3))
y = Flatten()(landmark_input)
y = Dense(128, activation='relu')(y)
y = Dropout(0.5)(y)
lnd_output = Dense(63, activation='linear')(y)  # 21 landmarks * 3 coordinates = 63

combined = Concatenate()([x, y])
final_output = Dense(1, activation='sigmoid')(combined)

model = Model(inputs=[image_input, landmark_input], outputs=[final_output])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_3 (Conv2D)              (None, 254, 254, 32  896         ['input_3[0][0]']                
                                )                                                                 
                                                                                                  
 max_pooling2d_3 (MaxPooling2D)  (None, 127, 127, 32  0          ['conv2d_3[0][0]']               
                                )                                                           

In [39]:
history = model.fit(
    [X_train_images, X_train_landmarks], y_train,
    epochs=5,
    batch_size=32,
    validation_data=([X_val_images, X_val_landmarks], y_val)
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [40]:
preds = model.predict([X_test_images, X_test_landmarks])



In [54]:
predicted_class = (preds > 0.5).astype(int)

In [55]:
from sklearn.metrics import confusion_matrix, classification_report
c_matrix = confusion_matrix(y_test, predicted_class)
print(c_matrix)

[[6161  837]
 [ 952 6045]]


In [56]:
class_report = classification_report(y_test, predicted_class)
print(class_report)

              precision    recall  f1-score   support

           0       0.87      0.88      0.87      6998
           1       0.88      0.86      0.87      6997

    accuracy                           0.87     13995
   macro avg       0.87      0.87      0.87     13995
weighted avg       0.87      0.87      0.87     13995



In [44]:
model.save(str(WEIGHTS_PATH))

In [57]:
def load_images_and_landmarks_from_extra_test(selected_indices):
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, refine_landmarks=True)

    def process_folder(folder_path, label):
        images, landmarks, labels = [], [], []
        for root, _, files in os.walk(folder_path):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(root, file)
                    image = cv2.imread(img_path)
                    if image is None:
                        continue
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    resized_image = cv2.resize(image, (256, 256))
                    results = face_mesh.process(resized_image)
                    if results.multi_face_landmarks:
                        fl = results.multi_face_landmarks[0]
                        landmark_coords = [(lm.x, lm.y, lm.z) for i, lm in enumerate(fl.landmark) if i in selected_indices]
                        images.append(resized_image)
                        landmarks.append(landmark_coords)
                        labels.append(label)
        return (
            np.asarray(images, dtype=np.uint8),
            np.asarray(landmarks, dtype=np.float32),
            np.asarray(labels, dtype=np.int64),
        )

    base_path = DATA_DIR / "extra_test"
    imgs, lnds, lbls = [], [], []
    for label in [0, 1]:
        folder = base_path / str(label)
        if not folder.exists():
            continue
        X, L, y = process_folder(str(folder), label)
        imgs.append(X); lnds.append(L); lbls.append(y)

    return np.concatenate(imgs, 0), np.concatenate(lnds, 0), np.concatenate(lbls, 0)

In [58]:
base_path = "dataset/extra_test"
X_images_extra, X_landmarks_extra, y_labels_extra = load_images_and_landmarks_from_extra_test(base_path, selected_indices)

In [59]:
X_images_extra, X_landmarks_extra, y_labels_extra = load_images_and_landmarks_from_extra_test(selected_indices)

In [97]:
preds = model.predict([X_images_extra, X_landmarks_extra])



In [107]:
predicted_class = (preds > 0.05).astype(int)

In [108]:
c_matrix = confusion_matrix(y_labels_extra, predicted_class)
print(c_matrix)

[[ 53 521]
 [ 69 631]]


In [109]:
class_report = classification_report(y_labels_extra, predicted_class)
print(class_report)

              precision    recall  f1-score   support

           0       0.43      0.09      0.15       574
           1       0.55      0.90      0.68       700

    accuracy                           0.54      1274
   macro avg       0.49      0.50      0.42      1274
weighted avg       0.50      0.54      0.44      1274

