In [7]:
import cv2
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as preprocess_vgg
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input as preprocess_resnet
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [8]:
# Set GPU configuration to avoid memory issues
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0], 
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [9]:
# Constants
IMG_SIZE = (224, 224)  # Target image size for feature extraction

In [10]:
def load_images_from_directory(directory, label):
    """
    Load and preprocess images from the specified directory.
    
    Parameters:
    - directory (str): Path to the directory containing images.
    - label (int): Label for the samples (0 for real, 1 for fake).
    
    Returns:
    - data (np.array): Array of processed images.
    - labels (np.array): Array of corresponding labels.
    """
    data = []
    labels = []
    image_id = 1

    for filename in os.listdir(directory):
        if filename.endswith(('.jpeg', '.jpg', '.png')):
            filepath = os.path.join(directory, filename)
            img = cv2.imread(filepath)
            if img is not None:
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_resized = cv2.resize(img_rgb, IMG_SIZE)
                img_array = img_to_array(img_resized)
                
                # Extract features using VGG16 and ResNet50
                img_vgg = preprocess_vgg(img_array)
                img_resnet = preprocess_resnet(img_array)
                
                # Append image features to data list
                data.append({
                    'img_array': img_array,
                    'vgg_features': VGG16(weights='imagenet', include_top=False, pooling='avg').predict(np.expand_dims(img_vgg, axis=0)).flatten(),
                    'resnet_features': ResNet50(weights='imagenet', include_top=False, pooling='avg').predict(np.expand_dims(img_resnet, axis=0)).flatten(),
                    'texture': cv2.Laplacian(cv2.cvtColor(img_resized, cv2.COLOR_RGB2GRAY), cv2.CV_64F).var(),
                    'color_mean': np.mean(img_array, axis=(0, 1)),
                    'image_quality': cv2.Laplacian(cv2.cvtColor(img_resized, cv2.COLOR_RGB2GRAY), cv2.CV_64F).var()
                })
                labels.append(label)
                print(f"Processed image {image_id}: {filename}")
                image_id += 1
    
    return np.array(data), np.array(labels)

In [11]:
# Load real and fake images
REAL_DIR = "./DFFD/real"
FAKE_DIR = "./DFFD/fake"

In [12]:
x_real, y_real = load_images_from_directory(REAL_DIR, label=0)  # Label 0 for real
x_fake, y_fake = load_images_from_directory(FAKE_DIR, label=1)  # Label 1 for fake

Processed image 1: Celeb-DF-Real-150.mp4_027.jpeg
Processed image 2: Celeb-DF-Real-150.mp4_028.jpeg
Processed image 3: Celeb-DF-Real-150.mp4_029.jpeg
Processed image 4: Celeb-DF-Real-152.mp4_023.jpeg
Processed image 5: Celeb-DF-Real-152.mp4_024.jpeg
Processed image 6: Celeb-DF-Real-152.mp4_029.jpeg
Processed image 7: Celeb-DF-Real-152.mp4_030.jpeg
Processed image 8: Celeb-DF-Real-153.mp4_022.jpeg
Processed image 9: Celeb-DF-Real-153.mp4_028.jpeg
Processed image 10: Celeb-DF-Real-153.mp4_029.jpeg
Processed image 11: Celeb-DF-Real-153.mp4_030.jpeg
Processed image 12: Celeb-DF-Real-154.mp4_027.jpeg
Processed image 13: Celeb-DF-Real-154.mp4_028.jpeg
Processed image 14: Celeb-DF-Real-154.mp4_029.jpeg
Processed image 15: Celeb-DF-Real-154.mp4_030.jpeg
Processed image 16: Celeb-DF-Real-157.mp4_025.jpeg
Processed image 17: Celeb-DF-Real-157.mp4_026.jpeg
Processed image 18: Celeb-DF-Real-157.mp4_027.jpeg
Processed image 19: Celeb-DF-Real-158.mp4_024.jpeg
Processed image 20: Celeb-DF-Real-158.mp

In [13]:
def load_or_preprocess_data():
    # if os.path.exists('./save/x_real.npy') and os.path.exists('./save/y_real.npy') and \
    #    os.path.exists('./save/x_fake.npy') and os.path.exists('./save/y_fake.npy'):
        # Load the preprocessed data
    x_real = np.load('./save/x_real.npy',allow_pickle=True)
    y_real = np.load('./save/y_real.npy',allow_pickle=True)
    x_fake = np.load('./save/x_fake.npy',allow_pickle=True)
    y_fake = np.load('./save/y_fake.npy',allow_pickle=True)
    print("Preprocessed data loaded successfully.")
    # else:
    #     # Preprocess the images
    #     REAL_DIR = "./DFFD/real"
    #     FAKE_DIR = "./DFFD/fake"

    #     x_real, y_real = load_images_from_directory(REAL_DIR, label=0)  # Label 0 for real
    #     x_fake, y_fake = load_images_from_directory(FAKE_DIR, label=1)  # Label 1 for fake

    #     # Save the preprocessed data
    #     np.save('./save/x_real.npy', x_real)
    #     np.save('./save/y_real.npy', y_real)
    #     np.save('./save/x_fake.npy', x_fake)
    #     np.save('./save/y_fake.npy', y_fake)

        # print("Images preprocessed and saved successfully.")
    
    return x_real, y_real, x_fake, y_fake

In [15]:
# Save preprocessed data
np.save('./save/x_real.npy', x_real)
np.save('./save/y_real.npy', y_real)
np.save('./save/x_fake.npy', x_fake)
np.save('./save/y_fake.npy', y_fake)

print("Preprocessed data saved successfully.")

Preprocessed data saved successfully.


In [16]:
x_real, y_real, x_fake, y_fake = load_or_preprocess_data()

ValueError: Object arrays cannot be loaded when allow_pickle=False

In [None]:
# Combine real and fake data
x_data = np.concatenate([x_real, x_fake], axis=0)
y_data = np.concatenate([y_real, y_fake], axis=0)

In [None]:
# Extract features and labels for model training
features_data = [item['img_array'] for item in x_data]
features_labels = y_data

In [None]:
# Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(features_data, features_labels, test_size=0.2, random_state=42, stratify=features_labels)?

In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(np.array(x_train)) # Fit on training data

In [None]:
# Define the feature extractor using VGG16
def create_feature_extractor():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    feature_extractor = tf.keras.Model(inputs=base_model.input, outputs=x)
    return feature_extractor

In [None]:
# Build the model
def create_model():
    feature_extractor = create_feature_extractor()
    
    model = Sequential()
    model.add(Input(shape=(224, 224, 3)))  # Input shape for single images
    model.add(feature_extractor)  # Feature extraction
    model.add(Dense(64, activation='relu'))  # Dense layer for intermediate features
    model.add(Dropout(0.5))  # Dropout for regularization
    model.add(Dense(1, activation='sigmoid'))  # Output layer: binary classification
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Create and summarize the model
model = create_model()
model.summary()

In [None]:
# Learning rate scheduler
lr_reduction = ReduceLROnPlateau(monitor='val_loss', 
                                  patience=3, 
                                  verbose=1, 
                                  factor=0.5, 
                                  min_lr=1e-6)

In [None]:
# Train the model
model.fit(datagen.flow(np.array(x_train), np.array(y_train), batch_size=16), 
          epochs=10, 
          validation_data=(np.array(x_val), np.array(y_val)), 
          callbacks=[lr_reduction])

In [None]:
# Save the model
model.save('mine.h5')