In [3]:
!pip install facenet-pytorch -q


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [23]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import time
from facenet_pytorch import InceptionResnetV1
import cv2
import torch

# Initialize FaceNet model (pretrained)
face_embedding_model = InceptionResnetV1(pretrained='vggface2').eval()

BATCH_SIZE = 32
IMAGE_SIZE = (160, 160)  # Input size required by FaceNet

# Load the data from directories
train_dir = "D:/Deep Fake Detection/train"
test_dir = "D:/Deep Fake Detection/test"
validation_dir = "D:/Deep Fake Detection/Validation"

# Using TensorFlow's image_dataset_from_directory function to load data
train_data = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='binary'  # Assuming binary classification
)

validation_data = tf.keras.preprocessing.image_dataset_from_directory(
    validation_dir,
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='binary'
)

test_data = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='binary'
)

# Define class names (based on the folder structure)
class_names = train_data.class_names
print("Class names:", class_names)

# Data Preprocessing (Modify to resize images for FaceNet)
def preprocess_image(image, label):
    image = tf.image.resize(image, IMAGE_SIZE)  # Resize to 160x160 for FaceNet
    image = image / 255.0  # Normalize image
    return image, label

train_data = train_data.map(preprocess_image)
validation_data = validation_data.map(preprocess_image)
test_data = test_data.map(preprocess_image)

# Function to generate face embeddings
def get_face_embedding(image_batch):
    # Convert the whole batch from channels-last (TensorFlow) to channels-first (PyTorch)
    image_batch = tf.transpose(image_batch, perm=[0, 3, 1, 2])  # [batch_size, channels, height, width]

    embeddings = []
    for img in image_batch:
        img = torch.tensor(img.numpy())  # Convert to PyTorch tensor
        img = img.unsqueeze(0)  # img becomes [1, channels, height, width]

        embedding = face_embedding_model(img).detach().numpy()  # Get embedding
        embeddings.append(embedding)

    return np.array(embeddings)


Found 1840 files belonging to 2 classes.
Found 48 files belonging to 2 classes.
Found 151 files belonging to 2 classes.
Class names: ['fake', 'real']


In [24]:
# Test: Get embeddings for a batch of images
for image_batch, label_batch in train_data.take(1):
    embeddings = get_face_embedding(image_batch)
    print("Face embeddings shape:", embeddings.shape)  # Check embedding shape


Face embeddings shape: (32, 1, 512)


In [25]:
# Define the CNN Model (Modify as needed)
model = tf.keras.models.Sequential()

In [26]:
# Add face embeddings layer instead of raw image input
model.add(tf.keras.layers.InputLayer(input_shape=(512,)))  # FaceNet embedding size is 512

model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(32, activation='relu'))

model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # Binary classification output

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [27]:
# Training Loop (Embedding extraction included in the loop)
start_time = time.time()

In [42]:
import numpy as np
import tensorflow as tf


history = None


start_time = time.time()

for image_batch, label_batch in train_data.take(1):

    image_embeddings = get_face_embedding(image_batch)


    print("Raw embedding shape:", tf.shape(image_embeddings).numpy())


    image_embeddings = tf.squeeze(image_embeddings)  # Use tf.squeeze instead of np.squeeze


    print("Squeezed embedding shape:", tf.shape(image_embeddings).numpy())  # Should be (batch_size, 512)


    label_batch = tf.squeeze(label_batch)  # Shape should become (batch_size,)


    print("Label batch shape:", tf.shape(label_batch).numpy())  # Should be (batch_size,)
    print("Label values:", label_batch.numpy())  # Check the label values to ensure they are 0 and 1 only


    try:
        history = model.fit(image_embeddings, label_batch, epochs=20, validation_data=validation_data)


        if history is None:
            print("Training did not return any history, something went wrong.")
        else:
            print("History object returned:", history.history)

    except Exception as e:
        print(f"An error occurred during training: {e}")

end_time = time.time()
print(f'Total time for training {(end_time-start_time):.3f} seconds')

Raw embedding shape: [ 32   1 512]
Squeezed embedding shape: [ 32 512]
Label batch shape: [32]
Label values: [1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0.
 1. 0. 1. 0. 1. 1. 1. 0.]
Epoch 1/20
An error occurred during training: Cannot take the length of shape with unknown rank.
Total time for training 2.833 seconds
