In [2]:
import torch
import torchvision

print("PyTorch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("Device name:", torch.cuda.get_device_name(0))

PyTorch version: 2.2.0+cu121
Torchvision version: 0.17.0+cu121
CUDA available: True
CUDA version: 12.1
Device name: NVIDIA GeForce RTX 3050 Laptop GPU


In [19]:
import os

def count_videos():
            # Fake videos
    celeb_real_folder = r"D:\DL package\venv\DL Dataset\Celeb-real"    # Real videos
    youtube_real_folder = r"D:\DL package\venv\DL Dataset\YouTube-real"  # Real videos
    celeb_fake_folder=r"D:\DL package\venv\DL Dataset\Celeb-synthesis"

    # Check if paths exist
    for folder in [celeb_real_folder, youtube_real_folder,celeb_fake_folder]:
        if not os.path.exists(folder):
            print(f"Error: Path not found → {folder}")
            return

    # Count videos in each folder
    def count_folder_videos(folder):
        return len([f for f in os.listdir(folder) if f.endswith(('.mp4', '.avi', '.mov'))])

    
    celeb_real_count = count_folder_videos(celeb_real_folder)
    youtube_real_count = count_folder_videos(youtube_real_folder)
    celeb_fake_count= count_folder_videos(celeb_fake_folder)

    
    print(f"Celeb-Real (Real): {celeb_real_count}")
    print(f"YouTube-Real (Real): {youtube_real_count}")
    print(f"Celeb-Fake (Fake): {celeb_fake_count}")

if __name__ == '__main__':
    count_videos()


Error: Path not found → D:\DL package\venv\DL Dataset\Celeb-real


Data Loading

In [5]:
import os
import torch
import random
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

# Define paths
DATASET_DIR = r'D:\DL package\venv\frames'
REAL_DIR = os.path.join(DATASET_DIR, 'real')
FAKE_DIR = os.path.join(DATASET_DIR, 'fake')

# Count real and fake video frames
num_real = len([f for f in os.listdir(REAL_DIR) if f.endswith('.pt')])
num_fake = len([f for f in os.listdir(FAKE_DIR) if f.endswith('.pt')])

print(f"Total Real Frames: {num_real}")
print(f"Total Fake Frames: {num_fake}")

class DeepFakeDataset(Dataset):
    def __init__(self, files, transform=None, preload=True):
        self.transform = transform
        self.preload = preload

        if preload:
            # Preload all files into memory
            self.data = [torch.load(f) for f in files]
        else:
            self.files = files

    def __len__(self):
        return len(self.data if self.preload else self.files)

    def __getitem__(self, idx):
        if self.preload:
            data = self.data[idx]
        else:
            file_path = self.files[idx]
            data = torch.load(file_path)
        
        frame, label = data['frame'], data['label']
        if self.transform:
            frame = self.transform(frame)
        
        return frame, label


# Define augmentation (for training only)
augmentation = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),  # Minor crop to preserve facial artifacts
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05),  # Subtle adjustments
    transforms.GaussianBlur(kernel_size=3),  # Mimic compression blur
    transforms.RandomApply([transforms.Lambda(lambda x: x + 0.02 * torch.randn_like(x))], p=0.3),  # Small Gaussian noise
])

# Get all file paths
real_files = [os.path.join(REAL_DIR, f) for f in os.listdir(REAL_DIR) if f.endswith('.pt')]
fake_files = [os.path.join(FAKE_DIR, f) for f in os.listdir(FAKE_DIR) if f.endswith('.pt')]
all_files = real_files + fake_files

# Shuffle before splitting
random.shuffle(all_files)

# Split into train (80%), validation (10%), and test (10%)
train_size = int(0.8 * len(all_files))
val_size = int(0.1 * len(all_files))
test_size = len(all_files) - train_size - val_size

train_files = all_files[:train_size]
val_files = all_files[train_size:train_size + val_size]
test_files = all_files[train_size + val_size:]

# Create datasets

def collate_fn(batch):
    """
    Custom collate function to reshape inputs into 5D tensors
    """
    frames, labels = zip(*batch)  # Extract frames and labels
    frames = torch.stack(frames)  # (batch_size, channels, height, width)
    
    # Reshape into 5D tensor: (batch_size, seq_length=1, channels, height, width)
    frames = frames.unsqueeze(1)  # Add seq_length=1 dimension
    
    labels = torch.tensor(labels)  # Convert labels to tensor
    
    return frames, labels


# Create DataLoaders
# Preload the dataset into memory
train_dataset = DeepFakeDataset(train_files, transform=augmentation, preload=True)
val_dataset = DeepFakeDataset(val_files, transform=None, preload=True)
test_dataset = DeepFakeDataset(test_files, transform=None, preload=True)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,collate_fn=collate_fn, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,collate_fn=collate_fn, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,collate_fn=collate_fn)

print(f"Train: {len(train_dataset)}, Validation: {len(val_dataset)}, Test: {len(test_dataset)}")


Total Real Frames: 9864
Total Fake Frames: 9898
Train: 15809, Validation: 1976, Test: 1977
