In [None]:
import cv2
import os

def extract_frames_from_videos(video_folder, output_folder, fps):
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # List all video files in the video_folder
    video_files = [f for f in os.listdir(video_folder) if f.endswith(('.mp4', '.avi', '.mov'))]

    for video_file in video_files:
        video_path = os.path.join(video_folder, video_file)
        video_output_folder = os.path.join(output_folder, os.path.splitext(video_file)[0])
        
        if not os.path.exists(video_output_folder):
            os.makedirs(video_output_folder)

        # Open the video file
        cap = cv2.VideoCapture(video_path)
        original_fps = cap.get(cv2.CAP_PROP_FPS)
        frame_interval = int(original_fps / fps)

        count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            if count % frame_interval == 0:
                frame_filename = os.path.join(video_output_folder, f"frame_{count}.jpg")
                cv2.imwrite(frame_filename, frame)
            
            count += 1

        cap.release()
        print(f"Frames extracted from {video_file} and saved to {video_output_folder}")

# Define directories for the dataset
dataset_folder = '/kaggle/input/ff-first100/FF++100each'
real_videos_folder = os.path.join(dataset_folder, 'Real')
fake_videos_folder = os.path.join(dataset_folder, 'Fake')

# Define output directories
output_folder_real = './Frames/Real'
output_folder_fake = './Frames/Fake'

# Extract frames from videos in both folders
extract_frames_from_videos(real_videos_folder, output_folder_real, fps=2)
extract_frames_from_videos(fake_videos_folder, output_folder_fake, fps=2)

In [None]:
from mtcnn import MTCNN
import cv2
import os

def detect_and_crop_faces(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    detector = MTCNN()

    # Iterate over each subfolder in the input folder (each subfolder contains frames for one video)
    for video_folder in os.listdir(input_folder):
        video_folder_path = os.path.join(input_folder, video_folder)
        video_output_folder = os.path.join(output_folder, video_folder)

        if not os.path.isdir(video_folder_path):
            continue

        if not os.path.exists(video_output_folder):
            os.makedirs(video_output_folder)

        # Iterate over all frames in the video subfolder
        for frame_file in os.listdir(video_folder_path):
            frame_path = os.path.join(video_folder_path, frame_file)
            
            # Read the frame
            image = cv2.imread(frame_path)
            if image is None:
                print(f"Warning: {frame_path} could not be loaded.")
                continue
            
            # Detect faces in the frame
            faces = detector.detect_faces(image)
            if not faces:
                print(f"No faces detected in {frame_path}.")
                continue
            
            # Process each detected face
            for i, face in enumerate(faces):
                x, y, width, height = face['box']
                x, y = max(0, x), max(0, y)

                # Validate cropping coordinates
                if x + width > image.shape[1] or y + height > image.shape[0]:
                    print(f"Invalid cropping coordinates for face {i} in {frame_path}.")
                    continue

                # Crop the face
                cropped_face = image[y:y+height, x:x+width]
                
                # Save the cropped face image
                cropped_face_filename = os.path.join(video_output_folder, f"{os.path.splitext(frame_file)[0]}_face_{i}.jpg")
                cv2.imwrite(cropped_face_filename, cropped_face)
                print(f"Saved cropped face to {cropped_face_filename}")

# Example usage
input_folder_real = '/kaggle/input/first100vids/kaggle/working/Frames/Real'
output_folder_real = './cropped_faces/Real'
input_folder_fake = '/kaggle/input/first100vids/kaggle/working/Frames/Fake'
output_folder_fake = './cropped_faces/Fake'

# Detect and crop faces in both Real and Fake frames
detect_and_crop_faces(input_folder_real, output_folder_real)
detect_and_crop_faces(input_folder_fake, output_folder_fake)

In [None]:
import cv2
import os

def resize_cropped_faces(input_folder, output_folder, target_size):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through each subfolder in the input folder (each subfolder contains cropped faces for one video)
    for video_folder in os.listdir(input_folder):
        video_folder_path = os.path.join(input_folder, video_folder)
        video_output_folder = os.path.join(output_folder, video_folder)

        if not os.path.isdir(video_folder_path):
            continue

        if not os.path.exists(video_output_folder):
            os.makedirs(video_output_folder)

        # Iterate over all cropped face images in the video subfolder
        for face_file in os.listdir(video_folder_path):
            face_path = os.path.join(video_folder_path, face_file)
            
            # Read the cropped face image
            image = cv2.imread(face_path)
            if image is None:
                print(f"Warning: {face_path} could not be loaded.")
                continue
            
            # Resize the image to the target size
            resized_image = cv2.resize(image, target_size)
            
            # Save the resized image
            resized_image_filename = os.path.join(video_output_folder, face_file)
            cv2.imwrite(resized_image_filename, resized_image)
            print(f"Resized image saved to {resized_image_filename}")

# Example usage
input_folder_real = '/kaggle/working/cropped_faces/Real'
output_folder_real = './resized_faces/Real'
input_folder_fake = '/kaggle/working/cropped_faces/Fake'
output_folder_fake = './resized_faces/Fake'

# Resize the cropped faces to 224x224 for both Real and Fake frames
resize_cropped_faces(input_folder_real, output_folder_real, target_size=(224, 224))
resize_cropped_faces(input_folder_fake, output_folder_fake, target_size=(224, 224))

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
import os
import numpy as np

# Helper function to load images and labels from a folder
def load_images_from_folder(folder, target_size=(224, 224)):
    image_paths = []
    labels = []

    for label in ['real', 'fake']:
        label_folder = os.path.join(folder, label)
        for file_name in os.listdir(label_folder):
            file_path = os.path.join(label_folder, file_name)
            image_paths.append(file_path)
            labels.append(0 if label == 'real' else 1)  # Real -> 0, Fake -> 1

    return image_paths, np.array(labels)

# Load training and validation images
train_folder = '/kaggle/input/preprocessed-faces/data/train'  # Replace with your train folder path
val_folder = '/kaggle/input/preprocessed-faces/data/val'      # Replace with your validation folder path

train_image_paths, train_labels = load_images_from_folder(train_folder)
val_image_paths, val_labels = load_images_from_folder(val_folder)

# Helper function to process images
def preprocess_image(image_path, target_size=(224, 224)):
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array = img_array / 255.0  # Normalize to [0, 1]
    return img_array

# Helper function to apply augmentation (for training set)
def augment_image(image):
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    return datagen.random_transform(image)

# Custom data generator for batch processing
def custom_data_generator(image_paths, labels, batch_size, is_training=True):
    while True:
        indices = np.arange(len(image_paths))
        if is_training:
            np.random.shuffle(indices)  # Shuffle for training set

        for start in range(0, len(image_paths), batch_size):
            end = min(start + batch_size, len(image_paths))
            batch_indices = indices[start:end]
            batch_images = []
            batch_labels = labels[batch_indices]

            for i in batch_indices:
                img_array = preprocess_image(image_paths[i])

                if is_training:
                    img_array = augment_image(img_array)

                batch_images.append(img_array)

            yield np.array(batch_images), batch_labels

# Parameters
batch_size = 32
train_steps = len(train_image_paths) // batch_size
val_steps = len(val_image_paths) // batch_size

# Create data generators
train_generator = custom_data_generator(train_image_paths, train_labels, batch_size, is_training=True)
val_generator = custom_data_generator(val_image_paths, val_labels, batch_size, is_training=False)

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the base ResNet50 model pre-trained on ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze all layers in the base model initially
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of ResNet
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

# Create the full model
model = Model(inputs=base_model.input, outputs=predictions)

# Unfreeze the last few layers of the ResNet model for fine-tuning
# for layer in base_model.layers[-10:]:  # Unfreeze last 10 layers for training
#     layer.trainable = True

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
checkpoint = tf.keras.callbacks.ModelCheckpoint('best_resnet.keras', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

# Train the model with your own datasets
history = model.fit(
    train_generator,
    steps_per_epoch=train_steps,
    validation_data=val_generator,
    validation_steps=val_steps,
    epochs=30,
    callbacks=[checkpoint, reduce_lr]
)

model.save('last_resnet.keras')

In [None]:
import matplotlib.pyplot as plt

def print_and_plot_metrics(history):
    # Extract metrics from the history object
    epochs = range(1, len(history.history['accuracy']) + 1)
    
    # Print metrics
    print("Training and Validation Metrics:")
    for key in history.history.keys():
        print(f"{key}: {history.history[key][-1]}")

    # Plot accuracy
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, history.history['accuracy'], label='Training Accuracy')
    plt.plot(epochs, history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, history.history['loss'], label='Training Loss')
    plt.plot(epochs, history.history['val_loss'], label='Validation Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.show()

# Assuming you have trained the model and have the history object
print_and_plot_metrics(history)