In [2]:
# List extracted files
import os
for root, dirs, files in os.walk(extraction_path):
    print(f"Root: {root}, Dirs: {dirs}, Files: {len(files)}")


Root: ./celeb-df-v2/, Dirs: ['Celeb-real', 'Celeb-synthesis', 'YouTube-real'], Files: 1
Root: ./celeb-df-v2/Celeb-real, Dirs: [], Files: 590
Root: ./celeb-df-v2/Celeb-synthesis, Dirs: [], Files: 5639
Root: ./celeb-df-v2/YouTube-real, Dirs: [], Files: 300


In [4]:
import shutil

# Paths
dataset_path = "./celeb-df-v2/"
real_path = os.path.join(dataset_path, "real")
fake_path = os.path.join(dataset_path, "fake")
os.makedirs(real_path, exist_ok=True)
os.makedirs(fake_path, exist_ok=True)

# Move real videos
for subfolder in ["Celeb-real", "YouTube-real"]:
    subfolder_path = os.path.join(dataset_path, subfolder)
    for file in os.listdir(subfolder_path):
        shutil.move(os.path.join(subfolder_path, file), os.path.join(real_path, file))

# Move fake videos
for file in os.listdir(os.path.join(dataset_path, "Celeb-synthesis")):
    shutil.move(os.path.join(dataset_path, "Celeb-synthesis", file), os.path.join(fake_path, file))

print("Dataset reorganized successfully!")


Dataset reorganized successfully!


In [6]:
# Verify counts
num_real = len(os.listdir(real_path))
num_fake = len(os.listdir(fake_path))

print(f"Number of real videos: {num_real}")
print(f"Number of fake videos: {num_fake}")


Number of real videos: 890
Number of fake videos: 5639


In [8]:
import cv2

def extract_frames(video_path, output_folder, num_frames=10):
    """
    Extract a specific number of frames from a video file.
    """
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    interval = frame_count // num_frames
    
    os.makedirs(output_folder, exist_ok=True)
    
    for i in range(num_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * interval)
        ret, frame = cap.read()
        if ret:
            frame_path = os.path.join(output_folder, f"frame_{i}.jpg")
            cv2.imwrite(frame_path, frame)
    cap.release()

# Example: Extract frames from one video
extract_frames("./celeb-df-v2/real/video1.mp4", "./frames/real/video1/", num_frames=10)


In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Build the model
def create_model(input_shape=(224, 224, 3)):
    base_model = tf.keras.applications.EfficientNetB0(include_top=False, input_shape=input_shape, weights="imagenet")
    base_model.trainable = False  # Freeze EfficientNet layers
    
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(1, activation='sigmoid')  # Binary classification: Real (0) or Fake (1)
    ])
    return model

model = create_model()
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [20]:
import os
print("Train path exists:", os.path.exists(train_path))
print("Validation path exists:", os.path.exists(val_path))
if os.path.exists(train_path):
    print("Train contents:", os.listdir(train_path))
if os.path.exists(val_path):
    print("Validation contents:", os.listdir(val_path))
import os
print("Train path exists:", os.path.exists(train_path))
print("Validation path exists:", os.path.exists(val_path))
if os.path.exists(train_path):
    print("Train contents:", os.listdir(train_path))
if os.path.exists(val_path):
    print("Validation contents:", os.listdir(val_path))



Train path exists: False
Validation path exists: False
Train path exists: False
Validation path exists: False


In [24]:
import os

# Directories to check
train_real_dir = os.path.join(train_path, "real")
train_fake_dir = os.path.join(train_path, "fake")
val_real_dir = os.path.join(val_path, "real")
val_fake_dir = os.path.join(val_path, "fake")

# Check if directories exist
print("Train Real Directory Exists:", os.path.exists(train_real_dir))
print("Train Fake Directory Exists:", os.path.exists(train_fake_dir))
print("Validation Real Directory Exists:", os.path.exists(val_real_dir))
print("Validation Fake Directory Exists:", os.path.exists(val_fake_dir))




Train Real Directory Exists: False
Train Fake Directory Exists: False
Validation Real Directory Exists: False
Validation Fake Directory Exists: False


In [32]:
import cv2
import os

# Function to extract frames from videos
def extract_frames(video_path, output_dir, label):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    video_capture = cv2.VideoCapture(video_path)
    success, frame = video_capture.read()
    count = 0
    while success:
        # Save frame as image file
        frame_path = os.path.join(output_dir, f"{label}_frame{count}.jpg")
        cv2.imwrite(frame_path, frame)
        success, frame = video_capture.read()
        count += 1
    video_capture.release()

# Paths to video directories
video_dirs = {
    "real": "./celeb-df-v2/Celeb-real",
    "fake": "./celeb-df-v2/Celeb-synthesis",
}

# Output directories
output_dirs = {
    "train_real": "./frames/train/real",
    "train_fake": "./frames/train/fake",
    "val_real": "./frames/val/real",
    "val_fake": "./frames/val/fake",
}

# Split videos into train and validation sets
for label, video_dir in video_dirs.items():
    videos = os.listdir(video_dir)
    split_index = int(len(videos) * 0.8)  # 80% train, 20% validation
    train_videos = videos[:split_index]
    val_videos = videos[split_index:]
    
    for video in train_videos:
        extract_frames(os.path.join(video_dir, video), output_dirs[f"train_{label}"], label)
    for video in val_videos:
        extract_frames(os.path.join(video_dir, video), output_dirs[f"val_{label}"], label)


In [34]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data generators
train_datagen = ImageDataGenerator(rescale=1.0/255.0, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
    train_path, 
    target_size=(224, 224), 
    batch_size=32, 
    class_mode='binary', 
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_path, 
    target_size=(224, 224), 
    batch_size=32, 
    class_mode='binary', 
    subset='validation'
)

# Print details
print("Classes (train):", train_generator.class_indices)
print("Number of samples in training set:", train_generator.samples)
print("Number of samples in validation set:", val_generator.samples)


FileNotFoundError: [WinError 3] The system cannot find the path specified: './frames/train'