In [21]:
import os
import random
import numpy as np
import cv2
import tensorflow as tf
from mtcnn.mtcnn import MTCNN

In [22]:
detector = MTCNN()

In [23]:
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
IMAGENET_STD  = np.array([0.229, 0.224, 0.225], dtype=np.float32)

In [24]:
def face_crop(frame, size=224):
    try:
        h, w, _ = frame.shape
        faces = detector.detect_faces(frame)

        if faces:
            best = max(faces, key=lambda d: d.get('confidence', 0.0))
            x, y, fw, fh = best['box']
            x = max(0, x); y = max(0, y)
            x2 = min(w, x + fw); y2 = min(h, y + fh)
            face = frame[y:y2, x:x2]
        else:
            # fallback: center crop
            crop_side = int(min(h, w) * 0.6)
            cx, cy = w // 2, h // 2
            x = cx - crop_side // 2
            y = cy - crop_side // 2
            face = frame[y:y + crop_side, x:x + crop_side]

        face = cv2.resize(face, (size, size))
        face = face.astype(np.float32) / 255.0
        face = (face - IMAGENET_MEAN) / IMAGENET_STD
        return face
    except Exception as e:
        print("Error in face_crop:", e)
        return None

In [25]:
def data_generator(root_dir, batch_size=16, img_size=224, is_training=True):
    
    real_dir = os.path.join(root_dir, "original")
    fake_dirs = [
        os.path.join(root_dir, d)
        for d in ["Deepfakes", "Face2Face", "FaceSwap", "NeuralTextures"]
        if os.path.exists(os.path.join(root_dir, d))
    ]

    real_files = [os.path.join(real_dir, f) for f in os.listdir(real_dir) if f.lower().endswith((".jpg", ".png"))]
    fake_files = []
    
    for fd in fake_dirs:
        fake_files.extend([os.path.join(fd, f) for f in os.listdir(fd) if f.lower().endswith((".jpg", ".png"))])

    split_real = int(0.8 * len(real_files))
    split_fake = int(0.8 * len(fake_files))

    if is_training:
        real_list = real_files[:split_real]
        fake_list = fake_files[:split_fake]
    else:
        real_list = real_files[split_real:]
        fake_list = fake_files[split_fake:]

    print(f" Real: {len(real_list)}  |  Fake: {len(fake_list)}")

    while True:
        half = batch_size // 2

        real_batch = random.sample(real_list, half)
        fake_batch = random.sample(fake_list, half)

        batch_files = real_batch + fake_batch
        batch_labels = [0]*half + [1]*half

        combined = list(zip(batch_files, batch_labels))
        random.shuffle(combined)

        batch_x, batch_y = [], []
        for path, label in combined:
            img = cv2.imread(path)
            if img is None:
                continue
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            face = face_crop(img, size=img_size)
            if face is not None:
                batch_x.append(face)
                batch_y.append(label)

        batch_x = np.array(batch_x)
        batch_y = tf.keras.utils.to_categorical(batch_y, num_classes=2)

        yield batch_x, batch_y

In [26]:
import os

root_dir = "FF++"
for cls in ["original", "Deepfakes", "Face2Face", "FaceSwap", "NeuralTextures"]:
    path = os.path.join(root_dir, cls)
    if os.path.exists(path):
        print(cls, ":", len(os.listdir(path)), "files")
    else:
        print(cls, "folder not found ")

original : 15 files
Deepfakes : 14 files
Face2Face : 12 files
FaceSwap : 12 files
NeuralTextures : 14 files


In [27]:
train_gen = data_generator("FF++", batch_size=32, is_training=True)
val_gen   = data_generator("FF++", batch_size=32, is_training=False)

In [28]:
train_gen = data_generator("FF++", batch_size=32, is_training=True)
val_gen   = data_generator("FF++", batch_size=32, is_training=False)

X, y = next(train_gen)
print("Batch shape:", X.shape, y.shape)
print("Real/Fake ratio:", np.sum(y[:,1]==0), "/", np.sum(y[:,1]==1))


 Real: 12  |  Fake: 41


ValueError: Sample larger than population or is negative

In [None]:
steps_per_epoch = min(len(real_list), len(fake_list)) // (batch_size // 2)
validation_steps=steps_per_epoch // 4, 
    
