In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
import os
import time
import warnings
from tqdm import tqdm

# --- 1. Configuration ---
# --- PLEASE UPDATE THIS PATH ---
# Point this to your new dataset folder that contains the 'real' and 'fake' subfolders.
DATA_DIR = "/Users/visheshbishnoi/Desktop/pd"  
# -----------------------------

IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
INITIAL_EPOCHS = 10
FINE_TUNE_EPOCHS = 5
TOTAL_EPOCHS = INITIAL_EPOCHS + FINE_TUNE_EPOCHS
VALIDATION_SPLIT = 0.2 # Use 20% of the data for validation
NUM_WORKERS = 2 # Number of parallel workers for data loading

# --- 2. Device Setup (Auto-detect M2 GPU) ---
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple M2 GPU (mps).")
else:
    device = torch.device("cpu")
    print("MPS not available. Using CPU.")

# --- 3. Data Transforms ---
# PyTorch models pre-trained on ImageNet use these specific normalization values
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(IMG_HEIGHT, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# --- 4. Model Creation Functions ---
def create_resnet50_model():
    """Creates a ResNet50 model for transfer learning."""
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
    
    # Freeze all base model parameters
    for param in model.parameters():
        param.requires_grad = False
        
    # Replace the final classifier (fc) layer
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, 512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, 1) # Output is 1 logit
    )
    return model

def create_efficientnet_model():
    """Creates an EfficientNet_B0 model for transfer learning."""
    # Note: torchvision has EfficientNet_B0, not V2 B0. 
    # This is the closest equivalent.
    model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
    
    # Freeze all base model parameters
    for param in model.parameters():
        param.requires_grad = False
        
    # Replace the final classifier layer
    num_ftrs = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(num_ftrs, 512),
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(512, 1) # Output is 1 logit
    )
    return model

# --- 5. Helper Functions for Training Loop ---
def train_one_epoch(model, loader, criterion, optimizer, device):
    """Runs a single training epoch."""
    model.train() # Set model to training mode
    running_loss = 0.0
    running_corrects = 0
    
    # Use tqdm for a progress bar
    pbar = tqdm(loader, desc="Training", leave=False)
    for inputs, labels in pbar:
        inputs = inputs.to(device)
        labels = labels.to(device).float().view(-1, 1)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Predict (apply sigmoid to logits, then threshold at 0.5)
            preds = torch.sigmoid(outputs) > 0.5
            
            # Backward pass + optimize
            loss.backward()
            optimizer.step()
            
        # Statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
        pbar.set_postfix(loss=loss.item())

    epoch_loss = running_loss / len(loader.dataset.indices)
    epoch_acc = running_corrects.float() / len(loader.dataset.indices)
    
    return epoch_loss, epoch_acc.item()

def validate_one_epoch(model, loader, criterion, device):
    """Runs a single validation epoch."""
    model.eval() # Set model to evaluate mode
    running_loss = 0.0
    running_corrects = 0
    
    # Use tqdm for a progress bar
    pbar = tqdm(loader, desc="Validating", leave=False)
    # No gradients needed for validation
    with torch.no_grad():
        for inputs, labels in pbar:
            inputs = inputs.to(device)
            labels = labels.to(device).float().view(-1, 1)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            preds = torch.sigmoid(outputs) > 0.5
            
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(loader.dataset.indices)
    epoch_acc = running_corrects.float() / len(loader.dataset.indices)
    
    return epoch_loss, epoch_acc.item()

# --- 6. Main Training Function ---
def train_model_pytorch(model_name, model_creator_func):
    """
    Trains and saves a single PyTorch model using a two-phase approach.
    """
    print(f"\n--- Training Model: {model_name} ---")
    
    # --- Data Loading ---
    print(f"\n[{model_name}] Stage 1/6: Loading and splitting data...")
    start_time = time.time()
    
    # Load the full dataset
    full_dataset = datasets.ImageFolder(DATA_DIR)
    
    # Split the dataset
    total_size = len(full_dataset)
    val_size = int(total_size * VALIDATION_SPLIT)
    train_size = total_size - val_size
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
    
    # Assign the correct transforms to each split
    train_dataset.dataset.transform = data_transforms['train']
    val_dataset.dataset.transform = data_transforms['val']
    
    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    
    print(f"Data loaded in {time.time() - start_time:.2f}s")
    print(f"Found {len(train_dataset)} training images and {len(val_dataset)} validation images.")
    print(f"Class indices: {full_dataset.class_to_idx}")

    # --- Model Creation ---
    print(f"\n[{model_name}] Stage 2/6: Creating model architecture...")
    start_time = time.time()
    model = model_creator_func()
    model = model.to(device) # Move model to M2 GPU
    print(f"[{model_name}] Model created in {time.time() - start_time:.2f}s")

    # --- Phase 1: Training the Classifier Head ---
    print(f"\n[{model_name}] Stage 3/6: Compiling and starting Phase 1 Training (Classifier Head)...")
    
    # Use BCEWithLogitsLoss for numerical stability (takes raw logits)
    criterion = nn.BCEWithLogitsLoss()
    
    # Optimize only the parameters of the new classifier head
    if model_name == "ResNet50":
        optimizer = optim.Adam(model.fc.parameters(), lr=0.0001)
    else: # EfficientNet
        optimizer = optim.Adam(model.classifier.parameters(), lr=0.0001)
    
    start_time = time.time()
    for epoch in range(INITIAL_EPOCHS):
        print(f"\nEpoch {epoch+1}/{TOTAL_EPOCHS}")
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate_one_epoch(model, val_loader, criterion, device)
        print(f"Epoch {epoch+1} Results: Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")
    
    print(f"[{model_name}] Phase 1 Training complete in {(time.time() - start_time)/60:.2f} minutes.")

    # --- Phase 2: Fine-Tuning ---
    print(f"\n[{model_name}] Stage 4/6: Preparing model for Fine-Tuning...")
    
    # Unfreeze all model parameters
    for param in model.parameters():
        param.requires_grad = True

    # Re-compile with a very low learning rate for fine-tuning
    print(f"\n[{model_name}] Stage 5/6: Re-compiling model for Fine-Tuning...")
    start_time = time.time()
    # Create a new optimizer for ALL parameters with a low LR
    optimizer = optim.Adam(model.parameters(), lr=0.00001)
    print(f"[{model_name}] Model re-compiled in {time.time() - start_time:.2f}s")
    
    print(f"\n[{model_name}] Stage 6/6: Starting Phase 2 Training (Fine-Tuning)...")
    start_time = time.time()
    # Continue training
    for epoch in range(INITIAL_EPOCHS, TOTAL_EPOCHS):
        print(f"\nEpoch {epoch+1}/{TOTAL_EPOCHS}")
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate_one_epoch(model, val_loader, criterion, device)
        print(f"Epoch {epoch+1} Results: Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")
    
    print(f"[{model_name}] Phase 2 Training complete in {(time.time() - start_time)/60:.2f} minutes.")
    
    # --- Save Model ---
    model_filename = f'deepfake_detector_{model_name}.pth' # .pth is the standard PyTorch extension
    print(f"\n[{model_name}] Saving final model to {model_filename}...")
    start_time = time.time()
    # Save the model's learned parameters (state dictionary)
    torch.save(model.state_dict(), model_filename)
    print(f"[{model_name}] Model saved in {time.time() - start_time:.2f}s")
    
    print(f"\n--- {model_name} Training Complete ---")

# --- 7. Main Execution Block ---
if __name__ == '__main__':
    # Check if data directory exists
    if not os.path.exists(DATA_DIR) or DATA_DIR == "./path_to_your_new_dataset_folder":
        print(f"Error: Data directory not found at '{DATA_DIR}'")
        print("Please update the 'DATA_DIR' variable at the top of the script.")
    else:
        # Train models sequentially
        
        # 1. Train ResNet50
        model_resnet = create_resnet50_model()
        train_model_pytorch("ResNet50", lambda: model_resnet)
        
        # 2. Train EfficientNet_B0
        model_efficientnet = create_efficientnet_model()
        train_model_pytorch("EfficientNet_B0", lambda: model_efficientnet)
        
        print("\n--- All Model Training Complete ---")

Using Apple M2 GPU (mps).

--- Training Model: ResNet50 ---

[ResNet50] Stage 1/6: Loading and splitting data...
Data loaded in 0.23s
Found 55515 training images and 13878 validation images.
Class indices: {'fake': 0, 'real': 1}

[ResNet50] Stage 2/6: Creating model architecture...
[ResNet50] Model created in 0.08s

[ResNet50] Stage 3/6: Compiling and starting Phase 1 Training (Classifier Head)...

Epoch 1/15


                                                                                

Epoch 1 Results: Train Loss: 0.5451 Acc: 0.7220 | Val Loss: 0.4750 Acc: 0.7718

Epoch 2/15


                                                                                

Epoch 2 Results: Train Loss: 0.4698 Acc: 0.7757 | Val Loss: 0.4501 Acc: 0.7836

Epoch 3/15


                                                                                

Epoch 3 Results: Train Loss: 0.4448 Acc: 0.7907 | Val Loss: 0.4275 Acc: 0.8026

Epoch 4/15


                                                                                

Epoch 4 Results: Train Loss: 0.4243 Acc: 0.8006 | Val Loss: 0.4139 Acc: 0.8077

Epoch 5/15


                                                                                

Epoch 5 Results: Train Loss: 0.4096 Acc: 0.8110 | Val Loss: 0.3993 Acc: 0.8179

Epoch 6/15


                                                                                

Epoch 6 Results: Train Loss: 0.3925 Acc: 0.8197 | Val Loss: 0.3908 Acc: 0.8214

Epoch 7/15


                                                                                

Epoch 7 Results: Train Loss: 0.3788 Acc: 0.8284 | Val Loss: 0.3781 Acc: 0.8307

Epoch 8/15


                                                                                

Epoch 8 Results: Train Loss: 0.3618 Acc: 0.8388 | Val Loss: 0.3739 Acc: 0.8315

Epoch 9/15


                                                                                

Epoch 9 Results: Train Loss: 0.3482 Acc: 0.8446 | Val Loss: 0.3636 Acc: 0.8375

Epoch 10/15


                                                                                

Epoch 10 Results: Train Loss: 0.3363 Acc: 0.8503 | Val Loss: 0.3540 Acc: 0.8422
[ResNet50] Phase 1 Training complete in 115.77 minutes.

[ResNet50] Stage 4/6: Preparing model for Fine-Tuning...

[ResNet50] Stage 5/6: Re-compiling model for Fine-Tuning...
[ResNet50] Model re-compiled in 0.00s

[ResNet50] Stage 6/6: Starting Phase 2 Training (Fine-Tuning)...

Epoch 11/15


Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(31233) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(31234) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 11 Results: Train Loss: 0.1838 Acc: 0.9244 | Val Loss: 0.1004 Acc: 0.9604

Epoch 12/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(31289) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(31290) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(31861) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(31863) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 12 Results: Train Loss: 0.0593 Acc: 0.9785 | Val Loss: 0.0652 Acc: 0.9746

Epoch 13/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(31903) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(31905) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(32425) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(32426) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 13 Results: Train Loss: 0.0268 Acc: 0.9904 | Val Loss: 0.0548 Acc: 0.9780

Epoch 14/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(32466) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(32467) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(33036) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33038) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 14 Results: Train Loss: 0.0183 Acc: 0.9930 | Val Loss: 0.0517 Acc: 0.9813

Epoch 15/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(33082) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33084) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(33606) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33607) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 15 Results: Train Loss: 0.0144 Acc: 0.9943 | Val Loss: 0.0428 Acc: 0.9839
[ResNet50] Phase 2 Training complete in 141.55 minutes.

[ResNet50] Saving final model to deepfake_detector_ResNet50.pth...
[ResNet50] Model saved in 0.41s

--- ResNet50 Training Complete ---
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /Users/visheshbishnoi/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|██████████████████████████████████████| 20.5M/20.5M [00:00<00:00, 23.1MB/s]



--- Training Model: EfficientNet_B0 ---

[EfficientNet_B0] Stage 1/6: Loading and splitting data...
Data loaded in 0.26s
Found 55515 training images and 13878 validation images.
Class indices: {'fake': 0, 'real': 1}

[EfficientNet_B0] Stage 2/6: Creating model architecture...
[EfficientNet_B0] Model created in 0.31s

[EfficientNet_B0] Stage 3/6: Compiling and starting Phase 1 Training (Classifier Head)...

Epoch 1/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(33646) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33648) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(33741) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33743) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 1 Results: Train Loss: 0.5288 Acc: 0.7361 | Val Loss: 0.4460 Acc: 0.7946

Epoch 2/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(33768) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33769) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(33909) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33912) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 2 Results: Train Loss: 0.4761 Acc: 0.7716 | Val Loss: 0.4157 Acc: 0.8099

Epoch 3/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(33933) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(33935) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(34117) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34119) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 3 Results: Train Loss: 0.4506 Acc: 0.7881 | Val Loss: 0.3870 Acc: 0.8273

Epoch 4/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(34170) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34177) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(34372) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34374) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 4 Results: Train Loss: 0.4285 Acc: 0.8009 | Val Loss: 0.3719 Acc: 0.8323

Epoch 5/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(34399) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34401) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(34580) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34581) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 5 Results: Train Loss: 0.4072 Acc: 0.8106 | Val Loss: 0.3402 Acc: 0.8553

Epoch 6/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(34636) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34638) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(34819) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34821) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 6 Results: Train Loss: 0.3882 Acc: 0.8230 | Val Loss: 0.3253 Acc: 0.8571

Epoch 7/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(34848) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(34849) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(35097) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35099) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 7 Results: Train Loss: 0.3717 Acc: 0.8309 | Val Loss: 0.3136 Acc: 0.8639

Epoch 8/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(35172) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35173) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(35350) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35357) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 8 Results: Train Loss: 0.3619 Acc: 0.8372 | Val Loss: 0.2944 Acc: 0.8756

Epoch 9/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(35434) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35436) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(35638) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35649) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 9 Results: Train Loss: 0.3469 Acc: 0.8470 | Val Loss: 0.2896 Acc: 0.8755

Epoch 10/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(35681) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35682) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(35889) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35890) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 10 Results: Train Loss: 0.3368 Acc: 0.8507 | Val Loss: 0.2755 Acc: 0.8853
[EfficientNet_B0] Phase 1 Training complete in 54.49 minutes.

[EfficientNet_B0] Stage 4/6: Preparing model for Fine-Tuning...

[EfficientNet_B0] Stage 5/6: Re-compiling model for Fine-Tuning...
[EfficientNet_B0] Model re-compiled in 0.00s

[EfficientNet_B0] Stage 6/6: Starting Phase 2 Training (Fine-Tuning)...

Epoch 11/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(35936) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(35937) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(38026) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(38028) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 11 Results: Train Loss: 0.2269 Acc: 0.9050 | Val Loss: 0.1211 Acc: 0.9554

Epoch 12/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(38050) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(38051) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(41510) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(41512) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 12 Results: Train Loss: 0.1331 Acc: 0.9481 | Val Loss: 0.0751 Acc: 0.9725

Epoch 13/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(41535) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(41544) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(44690) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(44692) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 13 Results: Train Loss: 0.0901 Acc: 0.9648 | Val Loss: 0.0569 Acc: 0.9785

Epoch 14/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(44742) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(44743) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(48329) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(48331) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 14 Results: Train Loss: 0.0686 Acc: 0.9738 | Val Loss: 0.0466 Acc: 0.9808

Epoch 15/15


Training:   0%|                                        | 0/1735 [00:00<?, ?it/s]python3.10(48355) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(48358) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Validating:   0%|                                       | 0/434 [00:00<?, ?it/s]python3.10(48919) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python3.10(48920) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
                                                                                

Epoch 15 Results: Train Loss: 0.0533 Acc: 0.9796 | Val Loss: 0.0375 Acc: 0.9847
[EfficientNet_B0] Phase 2 Training complete in 313.68 minutes.

[EfficientNet_B0] Saving final model to deepfake_detector_EfficientNet_B0.pth...
[EfficientNet_B0] Model saved in 0.38s

--- EfficientNet_B0 Training Complete ---

--- All Model Training Complete ---


predict

In [1]:
import torch
import torch.nn as nn
from torchvision import models, transforms
import cv2
import numpy as np
from PIL import Image
import time
import os
from tqdm import tqdm

# --- 1. Configuration ---
# Update these paths to your saved PyTorch models
MODEL_PATHS = {
    "ResNet50": 'deepfake_detector_ResNet50.pth',
    "EfficientNet_B0": 'deepfake_detector_EfficientNet_B0.pth'
}
HAAR_CASCADE_PATH = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
IMG_HEIGHT = 224
IMG_WIDTH = 224

# --- 2. Device Setup (Auto-detect M2 GPU) ---
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple M2 GPU (mps).")
else:
    device = torch.device("cpu")
    print("MPS not available. Using CPU.")

# --- 3. Data Transform (for prediction) ---
# This MUST be the same normalization as your validation data
data_transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- 4. Model Creation Functions ---
# We need to define the model architectures to load the saved weights (state_dict)
def create_resnet50_model():
    model = models.resnet50(weights=None) # Load architecture, not weights
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, 512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, 1)
    )
    return model

def create_efficientnet_model():
    model = models.efficientnet_b0(weights=None) # Load architecture, not weights
    num_ftrs = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(num_ftrs, 512),
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(512, 1)
    )
    return model

# --- 5. Load Models ---
def load_all_models(paths):
    """Loads all trained PyTorch models."""
    models_ensemble = {}
    print("\n--- Loading Ensemble Models ---")

    # 1. Load ResNet50
    if os.path.exists(paths["ResNet50"]):
        print(f"Loading ResNet50 from {paths['ResNet50']}...")
        model_resnet = create_resnet50_model()
        model_resnet.load_state_dict(torch.load(paths["ResNet50"], map_location=device))
        model_resnet = model_resnet.to(device)
        model_resnet.eval() # Set model to evaluation mode (CRITICAL)
        models_ensemble["ResNet50"] = model_resnet
    else:
        print(f"Warning: Model file not found at '{paths['ResNet50']}'.")

    # 2. Load EfficientNet_B0
    if os.path.exists(paths["EfficientNet_B0"]):
        print(f"Loading EfficientNet_B0 from {paths['EfficientNet_B0']}...")
        model_efficientnet = create_efficientnet_model()
        model_efficientnet.load_state_dict(torch.load(paths["EfficientNet_B0"], map_location=device))
        model_efficientnet = model_efficientnet.to(device)
        model_efficientnet.eval() # Set model to evaluation mode
        models_ensemble["EfficientNet_B0"] = model_efficientnet
    else:
        print(f"Warning: Model file not found at '{paths['EfficientNet_B0']}'.")

    if not models_ensemble:
        print("\nERROR: No models could be loaded. Please check the MODEL_PATHS.")
        return None
        
    print("\nModels loaded and set to eval() mode.")
    return models_ensemble

# --- 6. Video Analysis Function ---
def analyze_video(video_path, models_ensemble, face_cascade):
    """
    Opens a video file, analyzes it frame by frame, and provides a final verdict.
    """
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file at '{video_path}'")
        return

    # Get video properties for the progress bar
    try:
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    except:
        total_frames = 0
        
    print(f"\nAnalyzing video file: {video_path}")
    print(f"Total frames to process: {total_frames}")
    
    real_votes = 0
    fake_votes = 0
    faces_found_count = 0
    start_time = time.time()
    
    # Use tqdm for a progress bar
    pbar = tqdm(total=total_frames, desc="Processing Video")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # End of video
        
        # --- Face Detection ---
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
        
        # We'll just analyze the first (and likely only) face found in the frame
        if len(faces) > 0:
            faces_found_count += 1
            (x, y, w, h) = faces[0]
            
            # Crop the face
            face_crop_bgr = frame[y:y+h, x:x+w]
            
            # --- Preprocessing for PyTorch ---
            face_crop_rgb = cv2.cvtColor(face_crop_bgr, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(face_crop_rgb)
            input_tensor = data_transform(pil_image)
            input_batch = input_tensor.unsqueeze(0).to(device)
            
            # --- Get Ensemble Prediction ---
            all_predictions = []
            with torch.no_grad(): # Disable gradient calculation
                for model in models_ensemble.values():
                    logit = model(input_batch)
                    prob = torch.sigmoid(logit).item()
                    all_predictions.append(prob)
            
            # Average the probabilities
            average_prediction = sum(all_predictions) / len(all_predictions)
            
            # Tally votes
            if average_prediction < 0.5:
                fake_votes += 1
            else:
                real_votes += 1
        
        pbar.update(1) # Update progress bar
        
    pbar.close()
    cap.release()
    
    # --- Print Final Report ---
    print("\n--- Video Analysis Complete ---")
    print(f"Time taken: {time.time() - start_time:.2f} seconds")
    print(f"Total frames processed: {total_frames}")
    print(f"Frames with faces detected: {faces_found_count}")
    print("---------------------------------")
    print(f"Frames classified as REAL: {real_votes}")
    print(f"Frames classified as FAKE: {fake_votes}")
    print("---------------------------------")
    
    if faces_found_count == 0:
        print("Final Verdict: UNKNOWN (No faces were detected in the video)")
    else:
        fake_percentage = (fake_votes / faces_found_count) * 100
        real_percentage = (real_votes / faces_found_count) * 100
        
        if fake_percentage > 50:
            print(f"Final Verdict: LIKELY FAKE ({fake_percentage:.2f}% of face-frames were fake)")
        else:
            print(f"Final Verdict: LIKELY REAL ({real_percentage:.2f}% of face-frames were real)")
    print("---------------------------------")


# --- 7. Main Execution Block ---
if __name__ == '__main__':
    models = load_all_models(MODEL_PATHS)
    face_cascade = cv2.CascadeClassifier(HAAR_CASCADE_PATH)
    
    if models: # Only proceed if models were loaded
        while True:
            video_path = input("\nEnter the path to the video you want to analyze (or 'exit' to quit): ").strip()
            
            if video_path.lower() == 'exit':
                break
                
            if not os.path.exists(video_path):
                print(f"Error: The file was not found at '{video_path}'. Please check the path.")
                continue
                
            analyze_video(video_path, models, face_cascade)
    else:
        print("Exiting. No models were loaded to perform analysis.")

Using Apple M2 GPU (mps).

--- Loading Ensemble Models ---
Loading ResNet50 from deepfake_detector_ResNet50.pth...
Loading EfficientNet_B0 from deepfake_detector_EfficientNet_B0.pth...

Models loaded and set to eval() mode.



Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/celeb_fake_1891.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/data/fake/celeb_fake_1891.mp4
Total frames to process: 331


Processing Video: 100%|███████████████████████| 331/331 [00:10<00:00, 32.41it/s]


--- Video Analysis Complete ---
Time taken: 10.23 seconds
Total frames processed: 331
Frames with faces detected: 296
---------------------------------
Frames classified as REAL: 2
Frames classified as FAKE: 294
---------------------------------
Final Verdict: LIKELY FAKE (99.32% of face-frames were fake)
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1351.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1351.mp4
Total frames to process: 299


Processing Video: 100%|███████████████████████| 299/299 [00:10<00:00, 28.47it/s]


--- Video Analysis Complete ---
Time taken: 10.51 seconds
Total frames processed: 299
Frames with faces detected: 219
---------------------------------
Frames classified as REAL: 219
Frames classified as FAKE: 0
---------------------------------
Final Verdict: LIKELY REAL (100.00% of face-frames were real)
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1111.mp4.mp4


Error: The file was not found at '/Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1111.mp4.mp4'. Please check the path.



Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1111.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1111.mp4
Total frames to process: 299


Processing Video: 100%|███████████████████████| 299/299 [00:12<00:00, 24.77it/s]


--- Video Analysis Complete ---
Time taken: 12.07 seconds
Total frames processed: 299
Frames with faces detected: 299
---------------------------------
Frames classified as REAL: 297
Frames classified as FAKE: 2
---------------------------------
Final Verdict: LIKELY REAL (99.33% of face-frames were real)
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_0046.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_0046.mp4
Total frames to process: 299


Processing Video: 100%|███████████████████████| 299/299 [00:08<00:00, 37.37it/s]


--- Video Analysis Complete ---
Time taken: 8.01 seconds
Total frames processed: 299
Frames with faces detected: 6
---------------------------------
Frames classified as REAL: 4
Frames classified as FAKE: 2
---------------------------------
Final Verdict: LIKELY REAL (66.67% of face-frames were real)
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/real/dfdc_real_0105.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/data/real/dfdc_real_0105.mp4
Total frames to process: 299


Processing Video: 100%|███████████████████████| 299/299 [00:08<00:00, 35.88it/s]


--- Video Analysis Complete ---
Time taken: 8.34 seconds
Total frames processed: 299
Frames with faces detected: 93
---------------------------------
Frames classified as REAL: 92
Frames classified as FAKE: 1
---------------------------------
Final Verdict: LIKELY REAL (98.92% of face-frames were real)
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/real/celeb_real_0501.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/data/real/celeb_real_0501.mp4
Total frames to process: 376


Processing Video: 100%|███████████████████████| 376/376 [00:10<00:00, 34.82it/s]


--- Video Analysis Complete ---
Time taken: 10.80 seconds
Total frames processed: 376
Frames with faces detected: 376
---------------------------------
Frames classified as REAL: 376
Frames classified as FAKE: 0
---------------------------------
Final Verdict: LIKELY REAL (100.00% of face-frames were real)
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  exit


video-ccn+lstm preprocess

In [2]:
import cv2
import os
import glob
import numpy as np
import shutil # <--- 1. ADD THIS IMPORT
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# --- 1. Configuration ---
# --- PLEASE UPDATE THESE PATHS ---
# Path to your video dataset (containing 'real' and 'fake' subfolders)
INPUT_VIDEO_DIR = "/Users/visheshbishnoi/Desktop/data"
# Path to the output directory where processed frames will be saved
OUTPUT_FRAME_DIR = "/Users/visheshbishnoi/Desktop/data1"
# --------------------------------

# --- Advanced Settings ---
IMG_HEIGHT = 224
IMG_WIDTH = 224
# How many frames to extract from each video. This will be the sequence length.
NUM_FRAMES = 30 
# Split ratios for train, validation, and test sets
TEST_SPLIT = 0.15
VAL_SPLIT = 0.15

HAAR_CASCADE_PATH = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(HAAR_CASCADE_PATH)

# --- 2. Helper Functions ---
def get_frame_indices(total_frames, num_frames_to_extract):
    """Calculates evenly spaced frame indices to extract."""
    if total_frames < num_frames_to_extract:
        # If video is too short, duplicate the last frame
        indices = np.arange(total_frames).tolist()
        indices.extend([total_frames - 1] * (num_frames_to_extract - total_frames))
    else:
        # Get evenly spaced indices
        indices = np.linspace(0, total_frames - 1, num_frames_to_extract, dtype=int)
    return indices

def process_and_save_frame(frame, output_path):
    """Detects face, crops, resizes, and saves the frame."""
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
    
    if len(faces) > 0:
        (x, y, w, h) = sorted(faces, key=lambda f: f[2]*f[3], reverse=True)[0]
        face_crop = frame[y:y+h, x:x+w]
        resized_face = cv2.resize(face_crop, (IMG_WIDTH, IMG_HEIGHT))
        cv2.imwrite(output_path, resized_face)
        return True
    return False # No face found

# --- 3. Main Processing Logic ---
def process_videos():
    print("--- Starting Video Preprocessing for Temporal Model ---")
    
    all_video_files = [] # Will store (label, path) tuples
    for label in ['real', 'fake']:
        video_dir = os.path.join(INPUT_VIDEO_DIR, label)
        if not os.path.exists(video_dir):
            print(f"Warning: Directory not found, skipping: {video_dir}")
            continue
            
        for ext in ('*.mp4', '*.mov', '*.avi'):
            all_video_files.extend([(label, p) for p in glob.glob(os.path.join(video_dir, ext))])
    
    if not all_video_files:
        print(f"Error: No video files found in {INPUT_VIDEO_DIR}.")
        print("Please check your INPUT_VIDEO_DIR path.")
        return

    print(f"Found {len(all_video_files)} total videos.")
    
    # --- Split videos into train, val, and test SETS ---
    # We split by video, not by frame, to prevent data leakage
    train_val_files, test_files = train_test_split(all_video_files, test_size=TEST_SPLIT, random_state=42)
    val_ratio = VAL_SPLIT / (1.0 - TEST_SPLIT) # Adjust val split based on remaining data
    train_files, val_files = train_test_split(train_val_files, test_size=val_ratio, random_state=42)

    datasets = {
        'train': train_files,
        'val': val_files,
        'test': test_files
    }

    print(f"Splitting data: {len(train_files)} train, {len(val_files)} val, {len(test_files)} test videos.")

    # --- Process and Save Frames ---
    for split_name, video_list in datasets.items():
        print(f"\nProcessing '{split_name}' set...")
        split_output_dir = os.path.join(OUTPUT_FRAME_DIR, split_name)
        
        for label, video_path in tqdm(video_list, desc=f"Processing {split_name} videos"):
            video_name = os.path.splitext(os.path.basename(video_path))[0]
            
            # Create the final output directory for this video's frames
            # e.g., .../temporal_dataset/train/real/video_001/
            video_frame_dir = os.path.join(split_output_dir, label, video_name)
            if os.path.exists(video_frame_dir):
                continue # Skip if already processed
            os.makedirs(video_frame_dir, exist_ok=True)
            
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Warning: Could not open {video_path}")
                continue
            
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            if total_frames == 0:
                continue

            frame_indices = get_frame_indices(total_frames, NUM_FRAMES)
            
            frames_saved = 0
            for i, frame_num in enumerate(frame_indices):
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
                ret, frame = cap.read()
                if not ret:
                    continue
                
                output_filename = f"{i:03d}.jpg" # e.g., 001.jpg, 002.jpg
                output_path = os.path.join(video_frame_dir, output_filename)
                
                if process_and_save_frame(frame, output_path):
                    frames_saved += 1
            
            cap.release()
            
            # If we couldn't save all frames (e.g., no face detected),
            # remove the folder to avoid partial data
            if frames_saved < NUM_FRAMES:
                if os.path.exists(video_frame_dir):
                    # In a real scenario, you might want to handle this differently
                    # For now, we remove it to ensure clean sequences
                    shutil.rmtree(video_frame_dir) # <--- 2. CHANGE THIS LINE

    print("\n--- Preprocessing Complete ---")
    print(f"Frame sequences saved to: {OUTPUT_FRAME_DIR}")

# --- 4. Main Execution Block ---
if __name__ == "__main__":
    if not os.path.exists(HAAR_CASCADE_PATH):
        print(f"Error: Could not find Haar Cascade file at {HAAR_CASCADE_PATH}")
    else:
        process_videos()

--- Starting Video Preprocessing for Temporal Model ---
Found 2048 total videos.
Splitting data: 1432 train, 308 val, 308 test videos.

Processing 'train' set...


Processing train videos: 100%|██████████████| 1432/1432 [18:25<00:00,  1.30it/s]



Processing 'val' set...


Processing val videos: 100%|██████████████████| 308/308 [03:40<00:00,  1.39it/s]



Processing 'test' set...


Processing test videos: 100%|█████████████████| 308/308 [03:22<00:00,  1.52it/s]


--- Preprocessing Complete ---
Frame sequences saved to: /Users/visheshbishnoi/Desktop/data1





lstm model training:-

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import time
from tqdm import tqdm
import glob

# --- 1. Configuration ---
# --- PLEASE UPDATE THESE PATHS ---
# Path to your preprocessed temporal dataset (output of preprocess_videos.py)
TEMPORAL_DATA_DIR = "/Users/visheshbishnoi/Desktop/data1" 
# Path to your PRE-TRAINED EfficientNet_B0 model
PRETRAINED_MODEL_PATH = "./deepfake_detector_EfficientNet_B0.pth"
# --------------------------------

IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 8
NUM_FRAMES = 30 
NUM_WORKERS = 0 # Set to 0 to fix multiprocessing error on macOS/Windows
EPOCHS = 15

# --- 2. Device Setup (Auto-detect M2 GPU) ---
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple M2 GPU (mps).")
else:
    device = torch.device("cpu")
    print("MPS not available. Using CPU.")

# --- 3. Custom PyTorch Dataset ---
class VideoFrameDataset(Dataset):
    def __init__(self, data_dir, num_frames, transform=None):
        self.data_dir = data_dir
        self.num_frames = num_frames
        self.transform = transform
        self.samples = [] 
        
        for label, class_name in enumerate(['fake', 'real']):
            class_dir = os.path.join(data_dir, class_name)
            if not os.path.exists(class_dir):
                print(f"Warning: Directory not found, skipping: {class_dir}")
                continue
                
            for video_folder in os.listdir(class_dir):
                video_folder_path = os.path.join(class_dir, video_folder)
                if os.path.isdir(video_folder_path):
                    self.samples.append((video_folder_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        video_folder_path, label = self.samples[idx]
        
        frames = sorted(
            glob.glob(os.path.join(video_folder_path, '*.jpg')),
            key=lambda x: int(os.path.basename(x).split('.')[0])
        )
        
        if len(frames) != self.num_frames:
            if len(frames) == 0:
                black_img = Image.new('RGB', (IMG_WIDTH, IMG_HEIGHT))
                sequence = [black_img] * self.num_frames
            else:
                sequence = [Image.open(f) for f in frames]
                sequence.extend([sequence[-1]] * (self.num_frames - len(sequence)))
        else:
            sequence = [Image.open(f) for f in frames[:self.num_frames]]

        if self.transform:
            sequence = [self.transform(img) for img in sequence]
            
        sequence_tensor = torch.stack(sequence)
        
        return sequence_tensor, torch.tensor(label, dtype=torch.float32)

# --- 4. Data Transforms (Must match previous training) ---
data_transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- 5. Model Architecture (CNN+LSTM) ---
class CNN_LSTM_Model(nn.Module):
    def __init__(self, pretrained_model_path):
        super(CNN_LSTM_Model, self).__init__()
        
        # --- 1. Load the CNN Feature Extractor ---
        # Load the base EfficientNet_B0 architecture
        self.cnn_base = models.efficientnet_b0(weights=None)
        
        # Get the original feature size and replace the classifier
        num_ftrs = self.cnn_base.classifier[1].in_features # 1280 for B0
        self.cnn_base.classifier = nn.Sequential(
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 1)
        )
        
        # Load the weights we trained in the *previous* step
        print(f"Loading pretrained weights from: {pretrained_model_path}")
        self.cnn_base.load_state_dict(torch.load(pretrained_model_path, map_location=device))
        
        # We only want the features, so we remove the final classifier
        # For EfficientNet, the features are in the 'avgpool' layer
        self.cnn_feature_extractor = nn.Sequential(*list(self.cnn_base.children())[:-1])
        
        # --- 2. Freeze the CNN ---
        for param in self.cnn_feature_extractor.parameters():
            param.requires_grad = False
            
        # --- 3. The LSTM (Temporal) part ---
        self.lstm_hidden_size = 512
        self.lstm = nn.LSTM(
            input_size=num_ftrs, # 1280 for EfficientNet_B0
            hidden_size=self.lstm_hidden_size,
            num_layers=1,
            batch_first=True 
        )
        
        # --- 4. The Final Classifier ---
        self.classifier = nn.Sequential(
            nn.Linear(self.lstm_hidden_size, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 1) # Final output
        )

    def forward(self, x):
        # x shape: (batch_size, num_frames, channels, height, width)
        batch_size, num_frames, c, h, w = x.shape
        
        x = x.view(batch_size * num_frames, c, h, w)
        
        # Get features from CNN
        # output: (batch_size * num_frames, 1280, 1, 1)
        cnn_features = self.cnn_feature_extractor(x)
        
        # Reshape to (batch_size * num_frames, 1280)
        cnn_features = cnn_features.view(batch_size * num_frames, -1)
        
        # Reshape back to sequence: (batch_size, num_frames, 1280)
        sequence_features = cnn_features.view(batch_size, num_frames, -1)
        
        # Pass sequence to LSTM
        lstm_out, _ = self.lstm(sequence_features)
        
        # We only care about the output of the *last* frame in the sequence
        last_time_step_out = lstm_out[:, -1, :]
        
        # Pass to the final classifier
        logit = self.classifier(last_time_step_out)
        
        return logit

# --- 6. Main Training Block ---
if __name__ == "__main__":
    
    # --- Check for dataset and pretrained model ---
    if not os.path.exists(TEMPORAL_DATA_DIR):
        print(f"Error: Temporal data directory not found at '{TEMPORAL_DATA_DIR}'")
        print("Please run 'preprocess_videos.py' first.")
        exit()
    if not os.path.exists(PRETRAINED_MODEL_PATH):
        print(f"Error: Pretrained EfficientNet model not found at '{PRETRAINED_MODEL_PATH}'")
        print("Please ensure the .pth file is in this directory.")
        exit()

    # --- Create Datasets and DataLoaders ---
    print("Loading datasets...")
    train_dataset = VideoFrameDataset(
        data_dir=os.path.join(TEMPORAL_DATA_DIR, 'train'),
        num_frames=NUM_FRAMES,
        transform=data_transform
    )
    val_dataset = VideoFrameDataset(
        data_dir=os.path.join(TEMPORAL_DATA_DIR, 'val'),
        num_frames=NUM_FRAMES,
        transform=data_transform
    )
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    
    print(f"Found {len(train_dataset)} training sequences and {len(val_dataset)} validation sequences.")

    # --- Initialize Model, Loss, Optimizer ---
    print("Initializing temporal (CNN+LSTM) model...")
    model = CNN_LSTM_Model(PRETRAINED_MODEL_PATH).to(device)
    
    criterion = nn.BCEWithLogitsLoss()
    
    # We only optimize the parameters of the LSTM and the new classifier
    optimizer = optim.Adam(
        list(model.lstm.parameters()) + list(model.classifier.parameters()),
        lr=0.0001
    )
    
    print("--- Starting Temporal Model Training ---")
    
    for epoch in range(EPOCHS):
        print(f"\nEpoch {epoch+1}/{EPOCHS}")
        
        # --- Training Phase ---
        model.train()
        running_loss = 0.0
        running_corrects = 0
        
        train_pbar = tqdm(train_loader, desc="Training", leave=False)
        for inputs, labels in train_pbar:
            inputs = inputs.to(device)
            labels = labels.to(device).view(-1, 1) 
            
            optimizer.zero_grad()
            
            with torch.set_grad_enabled(True):
                outputs = model(inputs) 
                loss = criterion(outputs, labels)
                
                preds = torch.sigmoid(outputs) > 0.5
                
                loss.backward()
                optimizer.step()
                
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            train_pbar.set_postfix(loss=loss.item())

        epoch_train_loss = running_loss / len(train_dataset)
        epoch_train_acc = running_corrects.float() / len(train_dataset)

        # --- Validation Phase ---
        model.eval()
        running_loss = 0.0
        running_corrects = 0
        
        val_pbar = tqdm(val_loader, desc="Validating", leave=False)
        with torch.no_grad():
            for inputs, labels in val_pbar:
                inputs = inputs.to(device)
                labels = labels.to(device).view(-1, 1)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                preds = torch.sigmoid(outputs) > 0.5
                
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

        epoch_val_loss = running_loss / len(val_dataset)
        epoch_val_acc = running_corrects.float() / len(val_dataset)
        
        print(f"Epoch Results: Train Loss: {epoch_train_loss:.4f} Acc: {epoch_train_acc:.4f} | Val Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f}")
        
    # --- Save Final Model ---
    model_filename = 'deepfake_temporal_detector.pth'
    print(f"\nTraining complete. Saving final temporal model to {model_filename}...")
    torch.save(model.state_dict(), model_filename)
    print("Model saved successfully.")

Using Apple M2 GPU (mps).
Loading datasets...
Found 1071 training sequences and 228 validation sequences.
Initializing temporal (CNN+LSTM) model...
Loading pretrained weights from: ./deepfake_detector_EfficientNet_B0.pth
--- Starting Temporal Model Training ---

Epoch 1/15


                                                                                

Epoch Results: Train Loss: 0.4679 Acc: 0.8263 | Val Loss: 0.2176 Acc: 0.9254

Epoch 2/15


                                                                                

Epoch Results: Train Loss: 0.2645 Acc: 0.8992 | Val Loss: 0.1612 Acc: 0.9474

Epoch 3/15


                                                                                

Epoch Results: Train Loss: 0.2648 Acc: 0.8936 | Val Loss: 0.1620 Acc: 0.9474

Epoch 4/15


                                                                                

Epoch Results: Train Loss: 0.2351 Acc: 0.9057 | Val Loss: 0.1363 Acc: 0.9561

Epoch 5/15


                                                                                

Epoch Results: Train Loss: 0.2147 Acc: 0.9122 | Val Loss: 0.1554 Acc: 0.9430

Epoch 6/15


                                                                                

Epoch Results: Train Loss: 0.1479 Acc: 0.9458 | Val Loss: 0.2530 Acc: 0.9123

Epoch 7/15


                                                                                

Epoch Results: Train Loss: 0.2033 Acc: 0.9197 | Val Loss: 0.1091 Acc: 0.9561

Epoch 8/15


                                                                                

Epoch Results: Train Loss: 0.2115 Acc: 0.9122 | Val Loss: 0.1348 Acc: 0.9518

Epoch 9/15


                                                                                

Epoch Results: Train Loss: 0.1990 Acc: 0.9225 | Val Loss: 0.1411 Acc: 0.9605

Epoch 10/15


                                                                                

Epoch Results: Train Loss: 0.1643 Acc: 0.9356 | Val Loss: 0.1473 Acc: 0.9649

Epoch 11/15


                                                                                

Epoch Results: Train Loss: 0.2065 Acc: 0.9197 | Val Loss: 0.1223 Acc: 0.9518

Epoch 12/15


                                                                                

Epoch Results: Train Loss: 0.1294 Acc: 0.9486 | Val Loss: 0.1759 Acc: 0.9342

Epoch 13/15


                                                                                

Epoch Results: Train Loss: 0.1406 Acc: 0.9496 | Val Loss: 0.1554 Acc: 0.9386

Epoch 14/15


                                                                                

Epoch Results: Train Loss: 0.1437 Acc: 0.9505 | Val Loss: 0.1262 Acc: 0.9518

Epoch 15/15


                                                                                

Epoch Results: Train Loss: 0.1435 Acc: 0.9477 | Val Loss: 0.1760 Acc: 0.9430

Training complete. Saving final temporal model to deepfake_temporal_detector.pth...
Model saved successfully.


temporal prediction

In [2]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os
import cv2
import numpy as np
import time

# --- 1. Configuration ---
# Path to your FINAL saved temporal model (the EfficientNet+LSTM one)
SAVED_MODEL_PATH = "./deepfake_temporal_detector.pth"
# --------------------------------

IMG_HEIGHT = 224
IMG_WIDTH = 224
NUM_FRAMES = 30 # Must match the training sequence length

HAAR_CASCADE_PATH = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(HAAR_CASCADE_PATH)

# --- 2. Device Setup (Auto-detect M2 GPU) ---
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple M2 GPU (mps).")
else:
    device = torch.device("cpu")
    print("MPS not available. Using CPU.")

# --- 3. Data Transform (Must match training) ---
data_transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- 4. Model Architecture (Must match training) ---
# We must redefine the model architecture to load the weights
class CNN_LSTM_Model(nn.Module):
    def __init__(self):
        super(CNN_LSTM_Model, self).__init__()
        
        # --- 1. Load the CNN Feature Extractor ---
        self.cnn_base = models.efficientnet_b0(weights=None)
        
        num_ftrs = self.cnn_base.classifier[1].in_features
        self.cnn_base.classifier = nn.Sequential(
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 1)
        )
        
        # We only want the features
        self.cnn_feature_extractor = nn.Sequential(*list(self.cnn_base.children())[:-1])
            
        # --- 2. The LSTM (Temporal) part ---
        self.lstm_hidden_size = 512
        self.lstm = nn.LSTM(
            input_size=num_ftrs, # 1280 for EfficientNet_B0
            hidden_size=self.lstm_hidden_size,
            num_layers=1,
            batch_first=True 
        )
        
        # --- 3. The Final Classifier ---
        self.classifier = nn.Sequential(
            nn.Linear(self.lstm_hidden_size, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 1) # Final output
        )

    def forward(self, x):
        batch_size, num_frames, c, h, w = x.shape
        x = x.view(batch_size * num_frames, c, h, w)
        cnn_features = self.cnn_feature_extractor(x)
        cnn_features = cnn_features.view(batch_size * num_frames, -1)
        sequence_features = cnn_features.view(batch_size, num_frames, -1)
        lstm_out, _ = self.lstm(sequence_features)
        last_time_step_out = lstm_out[:, -1, :]
        logit = self.classifier(last_time_step_out)
        return logit

# --- 5. Frame Extraction & Preprocessing Logic ---
def get_frame_indices(total_frames, num_frames_to_extract):
    """Calculates evenly spaced frame indices to extract."""
    if total_frames < num_frames_to_extract:
        indices = np.arange(total_frames).tolist()
        indices.extend([total_frames - 1] * (num_frames_to_extract - total_frames))
    else:
        indices = np.linspace(0, total_frames - 1, num_frames_to_extract, dtype=int)
    return indices

def process_video_for_prediction(video_path):
    """
    Extracts and processes 30 frames from a video file,
    returning a tensor ready for the model.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return None

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames == 0:
        print("Error: Video file is empty or corrupt.")
        return None

    frame_indices = get_frame_indices(total_frames, NUM_FRAMES)
    
    frame_sequence = []
    frames_processed_count = 0
    
    for frame_num in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        ret, frame = cap.read()
        if not ret:
            continue

        # --- Face Detection and Cropping ---
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
        
        face_crop = None
        if len(faces) > 0:
            # Get the largest face
            (x, y, w, h) = sorted(faces, key=lambda f: f[2]*f[3], reverse=True)[0]
            face_crop = frame[y:y+h, x:x+w]
        else:
            # Fallback: If no face found, use the center-crop of the frame
            # This ensures we *always* get 30 frames
            h, w, _ = frame.shape
            cx, cy = w // 2, h // 2
            crop_size = min(h, w)
            face_crop = frame[cy - crop_size // 2:cy + crop_size // 2, cx - crop_size // 2:cx + crop_size // 2]

        # --- Transform ---
        # Convert BGR (OpenCV) to RGB (PIL)
        face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(face_crop_rgb)
        
        # Apply the same transforms as training
        transformed_frame = data_transform(pil_image)
        frame_sequence.append(transformed_frame)
        frames_processed_count += 1

    cap.release()

    if frames_processed_count != NUM_FRAMES:
        print(f"Warning: Could only process {frames_processed_count} of {NUM_FRAMES} frames.")
        if frames_processed_count == 0:
            return None
        while len(frame_sequence) < NUM_FRAMES:
            frame_sequence.append(frame_sequence[-1])
            
    # Stack the 30 frames into a single tensor
    sequence_tensor = torch.stack(frame_sequence)
    
    # Add the batch dimension
    return sequence_tensor.unsqueeze(0)

# --- 6. Main Prediction Block ---
if __name__ == "__main__":
    
    if not os.path.exists(SAVED_MODEL_PATH):
        print(f"Error: Model file not found at '{SAVED_MODEL_PATH}'")
        print("Please make sure the trained 'deepfake_temporal_detector.pth' is in the same directory.")
        exit()
    if face_cascade.empty():
        print(f"Error: Could not load Haar Cascade file for face detection.")
        exit()

    # --- Load Model ---
    print("Loading temporal (EfficientNet+LSTM) model...")
    model = CNN_LSTM_Model().to(device)
    model.load_state_dict(torch.load(SAVED_MODEL_PATH, map_location=device))
    model.eval() # Set model to evaluation mode (CRITICAL)
    print("Model loaded successfully.")

    # --- Prediction Loop ---
    while True:
        video_path = input("\nEnter the path to the video you want to analyze (or 'exit' to quit): ").strip()
        
        if video_path.lower() == 'exit':
            break
            
        if not os.path.exists(video_path):
            print(f"Error: The file was not found at '{video_path}'. Please check the path.")
            continue
            
        print(f"\nAnalyzing video: {video_path}")
        start_time = time.time()

        # 1. Process the video into a frame sequence tensor
        input_tensor = process_video_for_prediction(video_path)
        
        if input_tensor is None:
            print("Could not process video.")
            continue
            
        # 2. Send tensor to the GPU
        input_tensor = input_tensor.to(device)
        
        # 3. Get prediction
        with torch.no_grad(): # Disable gradient calculation for inference
            logit = model(input_tensor)
            prob = torch.sigmoid(logit).item() # Get final probability
            
        end_time = time.time()

        # 4. Report the verdict
        if prob < 0.5:
            confidence = (1 - prob) * 100
            print(f"\n--- Verdict: FAKE ---")
            print(f"Confidence: {confidence:.2f}%")
        else:
            confidence = prob * 100
            print(f"\n--- Verdict: REAL ---")
            print(f"Confidence: {confidence:.2f}%")
            
        print(f"Analysis took {end_time - start_time:.2f} seconds.")

Using Apple M2 GPU (mps).
Loading temporal (EfficientNet+LSTM) model...
Model loaded successfully.



Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1111.mp4



Analyzing video: /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_1111.mp4

--- Verdict: FAKE ---
Confidence: 75.51%
Analysis took 2.65 seconds.



Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_0046.mp4



Analyzing video: /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_0046.mp4

--- Verdict: FAKE ---
Confidence: 99.34%
Analysis took 5.02 seconds.



Enter the path to the video you want to analyze (or 'exit' to quit):  exit


stack training

In [2]:
# train_stacking_model.py
import torch
import torch.nn as nn
from torchvision import models, transforms
import cv2
import numpy as np
from PIL import Image
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib # For saving the model

# --- 1. Configuration ---
# UPDATE THESE PATHS
MODEL_PATHS = {
    "ResNet50": 'deepfake_detector_ResNet50.pth',
    "EfficientNet_B0": 'deepfake_detector_EfficientNet_B0.pth'
}
HAAR_CASCADE_PATH = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
IMG_HEIGHT = 224
IMG_WIDTH = 224

# UPDATE to your video dataset
REAL_VIDEO_DIR = "/Users/visheshbishnoi/Desktop/videos/real"
FAKE_VIDEO_DIR = "/Users/visheshbishnoi/Desktop/videos/fake"
META_MODEL_SAVE_PATH = "stacking_logistic_regression.pkl"
SCALER_SAVE_PATH = "stacking_scaler.pkl" # IMPORTANT for new data

# --- 2. Device & Model Setup (Copied from your script) ---
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple M2 GPU (mps).")
else:
    device = torch.device("cpu")
    print("MPS not available. Using CPU.")

data_transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

def create_resnet50_model():
    model = models.resnet50(weights=None)
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(nn.Linear(num_ftrs, 512), nn.ReLU(), nn.Dropout(0.5), nn.Linear(512, 1))
    return model

def create_efficientnet_model():
    model = models.efficientnet_b0(weights=None)
    num_ftrs = model.classifier[1].in_features
    model.classifier = nn.Sequential(nn.Dropout(p=0.2, inplace=True), nn.Linear(num_ftrs, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(512, 1))
    return model

def load_all_models(paths):
    models_ensemble = {}
    print("\n--- Loading Ensemble Models ---")
    
    # Load ResNet50
    if os.path.exists(paths["ResNet50"]):
        print(f"Loading ResNet50...")
        model_resnet = create_resnet50_model()
        model_resnet.load_state_dict(torch.load(paths["ResNet50"], map_location=device))
        model_resnet = model_resnet.to(device)
        model_resnet.eval()
        models_ensemble["ResNet50"] = model_resnet
    else:
        print(f"FATAL: ResNet50 model not found at '{paths['ResNet50']}'.")
        return None

    # Load EfficientNet_B0
    if os.path.exists(paths["EfficientNet_B0"]):
        print(f"Loading EfficientNet_B0...")
        model_efficientnet = create_efficientnet_model()
        model_efficientnet.load_state_dict(torch.load(paths["EfficientNet_B0"], map_location=device))
        model_efficientnet = model_efficientnet.to(device)
        model_efficientnet.eval()
        models_ensemble["EfficientNet_B0"] = model_efficientnet
    else:
        print(f"FATAL: EfficientNet_B0 model not found at '{paths['EfficientNet_B0']}'.")
        return None
        
    print("\nModels loaded and set to eval() mode.")
    return models_ensemble

# --- 3. Phase 1: Feature Extraction Function ---
def get_features_for_video(video_path, models_ensemble, face_cascade):
    """
    Processes a single video and returns a feature vector based on predictions.
    Returns None if no faces are found.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Warning: Could not open video file {video_path}")
        return None

    # Store all frame-level probabilities for each model
    model_probs = {name: [] for name in models_ensemble.keys()}

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
        
        if len(faces) > 0:
            (x, y, w, h) = faces[0]
            face_crop_bgr = frame[y:y+h, x:x+w]
            
            face_crop_rgb = cv2.cvtColor(face_crop_bgr, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(face_crop_rgb)
            input_tensor = data_transform(pil_image)
            input_batch = input_tensor.unsqueeze(0).to(device)
            
            with torch.no_grad():
                for name, model in models_ensemble.items():
                    logit = model(input_batch)
                    prob = torch.sigmoid(logit).item()
                    model_probs[name].append(prob)
    
    cap.release()

    # --- Feature Calculation ---
    # Check if any faces were found at all
    if not any(model_probs.values()):
        print(f"Warning: No faces found in {video_path}")
        return None

    features = []
    for name in models_ensemble.keys():
        probs = model_probs[name]
        if not probs:
            # Handle case where one model ran but no faces were found (should be caught above, but as a safeguard)
            probs = [0.5] # Neutral value if no faces detected
            
        features.append(np.mean(probs))
        features.append(np.std(probs))
        features.append(np.median(probs))
        
    # Our feature vector is [resnet_avg, resnet_std, resnet_median, effnet_avg, effnet_std, effnet_median]
    return features

# --- 4. Main Execution (Phase 1 & 2) ---
if __name__ == '__main__':
    models = load_all_models(MODEL_PATHS)
    face_cascade = cv2.CascadeClassifier(HAAR_CASCADE_PATH)
    
    if models is None:
        print("Exiting. Base models could not be loaded.")
        exit()

    X_meta = [] # To store our feature vectors
    y_meta = [] # To store our labels (0=fake, 1=real)

    # --- Phase 1: Process all videos ---
    print("\n--- Phase 1: Starting Feature Generation ---")
    
    # Process REAL videos (Label = 1)
    real_files = [os.path.join(REAL_VIDEO_DIR, f) for f in os.listdir(REAL_VIDEO_DIR) if f.endswith(('.mp4', '.avi', '.mov'))]
    print(f"\nProcessing {len(real_files)} REAL videos...")
    for video_path in tqdm(real_files, desc="Real Videos"):
        features = get_features_for_video(video_path, models, face_cascade)
        if features:
            X_meta.append(features)
            y_meta.append(1)

    # Process FAKE videos (Label = 0)
    fake_files = [os.path.join(FAKE_VIDEO_DIR, f) for f in os.listdir(FAKE_VIDEO_DIR) if f.endswith(('.mp4', '.avi', '.mov'))]
    print(f"\nProcessing {len(fake_files)} FAKE videos...")
    for video_path in tqdm(fake_files, desc="Fake Videos"):
        features = get_features_for_video(video_path, models, face_cascade)
        if features:
            X_meta.append(features)
            y_meta.append(0)

    print(f"\nFeature generation complete. Generated {len(X_meta)} feature vectors.")

    # --- Phase 2: Train the Meta-Model ---
    print("\n--- Phase 2: Training Stacking Meta-Model ---")
    
    X = np.array(X_meta)
    y = np.array(y_meta)

    # Split the data for evaluation
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # CRITICAL: Scale the features
    # Tree-based models don't need this, but Logistic Regression and SVMs do.
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    print("Training Logistic Regression model...")
    # You can add class_weight='balanced' if your dataset is imbalanced
    meta_model = LogisticRegression(random_state=42, max_iter=1000)
    meta_model.fit(X_train_scaled, y_train)

    # Evaluate the meta-model
    y_pred = meta_model.predict(X_test_scaled)
    print(f"\nMeta-Model Test Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=["FAKE (0)", "REAL (1)"]))

    # --- Final Step: Train on ALL data and Save ---
    print("\nTraining final model on ALL data...")
    # We re-fit the scaler and model on 100% of the data for deployment
    final_scaler = StandardScaler()
    X_scaled_full = final_scaler.fit_transform(X)
    
    final_meta_model = LogisticRegression(random_state=42, max_iter=1000)
    final_meta_model.fit(X_scaled_full, y)
    
    # Save the scaler and the model
    joblib.dump(final_meta_model, META_MODEL_SAVE_PATH)
    joblib.dump(final_scaler, SCALER_SAVE_PATH)
    
    print(f"Successfully trained and saved meta-model to: {META_MODEL_SAVE_PATH}")
    print(f"Successfully saved feature scaler to: {SCALER_SAVE_PATH}")
    print("\nTraining pipeline complete.")

Using Apple M2 GPU (mps).

--- Loading Ensemble Models ---
Loading ResNet50...
Loading EfficientNet_B0...

Models loaded and set to eval() mode.

--- Phase 1: Starting Feature Generation ---

Processing 998 REAL videos...


Real Videos:  76%|███████████████████▊      | 760/998 [3:06:22<59:51, 15.09s/it]



Real Videos: 100%|██████████████████████████| 998/998 [4:08:58<00:00, 14.97s/it]



Processing 1000 FAKE videos...


Fake Videos:  51%|███████████▋           | 506/1000 [1:55:40<1:28:35, 10.76s/it]



Fake Videos: 100%|████████████████████████| 1000/1000 [3:46:33<00:00, 13.59s/it]



Feature generation complete. Generated 1996 feature vectors.

--- Phase 2: Training Stacking Meta-Model ---
Training Logistic Regression model...

Meta-Model Test Accuracy: 0.9200
Classification Report:
              precision    recall  f1-score   support

    FAKE (0)       0.97      0.87      0.92       200
    REAL (1)       0.88      0.97      0.92       200

    accuracy                           0.92       400
   macro avg       0.92      0.92      0.92       400
weighted avg       0.92      0.92      0.92       400


Training final model on ALL data...
Successfully trained and saved meta-model to: stacking_logistic_regression.pkl
Successfully saved feature scaler to: stacking_scaler.pkl

Training pipeline complete.


stack predict

In [3]:
# predict_with_stacking.py
import torch
import torch.nn as nn
from torchvision import models, transforms
import cv2
import numpy as np
from PIL import Image
import time
import os
from tqdm import tqdm
import joblib # <-- Import joblib

# --- 1. Configuration ---
MODEL_PATHS = {
    "ResNet50": 'deepfake_detector_ResNet50.pth',
    "EfficientNet_B0": 'deepfake_detector_EfficientNet_B0.pth'
}
HAAR_CASCADE_PATH = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
IMG_HEIGHT = 224
IMG_WIDTH = 224

# --- NEW: Paths to the saved stacking model ---
META_MODEL_PATH = "stacking_logistic_regression.pkl"
SCALER_PATH = "stacking_scaler.pkl"

# --- 2. Device Setup ---
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple M2 GPU (mps).")
else:
    device = torch.device("cpu")
    print("MPS not available. Using CPU.")

# --- 3. Data Transform ---
data_transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- 4. Model Creation Functions (Same as before) ---
def create_resnet50_model():
    model = models.resnet50(weights=None)
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(nn.Linear(num_ftrs, 512), nn.ReLU(), nn.Dropout(0.5), nn.Linear(512, 1))
    return model

def create_efficientnet_model():
    model = models.efficientnet_b0(weights=None)
    num_ftrs = model.classifier[1].in_features
    model.classifier = nn.Sequential(nn.Dropout(p=0.2, inplace=True), nn.Linear(num_ftrs, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(512, 1))
    return model

# --- 5. Load All Models (Base + Stacking) ---
def load_all_models_and_stacker(paths, stacker_path, scaler_path):
    # Load base models
    models_ensemble = {}
    print("\n--- Loading Ensemble Models ---")
    
    if os.path.exists(paths["ResNet50"]):
        print(f"Loading ResNet50...")
        model_resnet = create_resnet50_model()
        model_resnet.load_state_dict(torch.load(paths["ResNet50"], map_location=device))
        model_resnet = model_resnet.to(device)
        model_resnet.eval()
        models_ensemble["ResNet50"] = model_resnet
    else:
        print(f"Error: Model file not found at '{paths['ResNet50']}'.")
        return None, None, None

    if os.path.exists(paths["EfficientNet_B0"]):
        print(f"Loading EfficientNet_B0...")
        model_efficientnet = create_efficientnet_model()
        model_efficientnet.load_state_dict(torch.load(paths["EfficientNet_B0"], map_location=device))
        model_efficientnet = model_efficientnet.to(device)
        model_efficientnet.eval()
        models_ensemble["EfficientNet_B0"] = model_efficientnet
    else:
        print(f"Error: Model file not found at '{paths['EfficientNet_B0']}'.")
        return None, None, None
        
    print("Base models loaded.")

    # Load stacking meta-model and scaler
    try:
        print(f"Loading meta-model from {stacker_path}...")
        meta_model = joblib.load(stacker_path)
        print(f"Loading scaler from {scaler_path}...")
        scaler = joblib.load(scaler_path)
    except FileNotFoundError as e:
        print(f"ERROR: Could not load stacking model or scaler. {e}")
        print("Please run 'train_stacking_model.py' first.")
        return None, None, None
        
    print("Stacking model and scaler loaded.")
    return models_ensemble, meta_model, scaler

# --- 6. MODIFIED Video Analysis Function ---
def analyze_video_with_stacking(video_path, models_ensemble, face_cascade, meta_model, scaler):
    """
    Analyzes a video, generates features, and uses the stacking model
    for a final prediction.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file at '{video_path}'")
        return

    try:
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    except:
        total_frames = 0
        
    print(f"\nAnalyzing video file: {video_path}")
    
    # Store all frame-level probabilities
    model_probs = {name: [] for name in models_ensemble.keys()}
    faces_found_count = 0
    start_time = time.time()
    
    pbar = tqdm(total=total_frames, desc="Processing Video")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
        
        if len(faces) > 0:
            faces_found_count += 1
            (x, y, w, h) = faces[0]
            
            face_crop_bgr = frame[y:y+h, x:x+w]
            
            face_crop_rgb = cv2.cvtColor(face_crop_bgr, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(face_crop_rgb)
            input_tensor = data_transform(pil_image)
            input_batch = input_tensor.unsqueeze(0).to(device)
            
            with torch.no_grad():
                for name, model in models_ensemble.items():
                    logit = model(input_batch)
                    prob = torch.sigmoid(logit).item()
                    model_probs[name].append(prob)
        
        pbar.update(1)
        
    pbar.close()
    cap.release()
    
    print("\n--- Video Analysis Complete ---")
    print(f"Time taken: {time.time() - start_time:.2f} seconds")
    print(f"Frames with faces detected: {faces_found_count}")
    print("---------------------------------")
    
    if faces_found_count == 0:
        print("Final Verdict: UNKNOWN (No faces were detected in the video)")
        print("---------------------------------")
        return

    # --- Generate Feature Vector (MUST match training script) ---
    features = []
    for name in models_ensemble.keys():
        probs = model_probs[name]
        if not probs:
            probs = [0.5] # Handle no-face edge case
            
        features.append(np.mean(probs))
        features.append(np.std(probs))
        features.append(np.median(probs))
        
    # Reshape for a single prediction
    features_np = np.array(features).reshape(1, -1)
    
    # --- Scale the features ---
    features_scaled = scaler.transform(features_np)
    
    # --- Get Final Prediction ---
    prediction = meta_model.predict(features_scaled)[0]
    probability = meta_model.predict_proba(features_scaled)[0]
    
    if prediction == 0:
        verdict = "FAKE"
        confidence = probability[0] # Probability of class 0
    else:
        verdict = "REAL"
        confidence = probability[1] # Probability of class 1

    print(f"Final Verdict: LIKELY {verdict}")
    print(f"Model Confidence: {confidence*100:.2f}%")
    print("---------------------------------")


# --- 7. Main Execution Block ---
if __name__ == '__main__':
    base_models, meta_model, scaler = load_all_models_and_stacker(MODEL_PATHS, META_MODEL_PATH, SCALER_PATH)
    face_cascade = cv2.CascadeClassifier(HAAR_CASCADE_PATH)
    
    if base_models and meta_model and scaler: # Only proceed if all models loaded
        while True:
            video_path = input("\nEnter the path to the video you want to analyze (or 'exit' to quit): ").strip()
            
            if video_path.lower() == 'exit':
                break
                
            if not os.path.exists(video_path):
                print(f"Error: The file was not found at '{video_path}'. Please check the path.")
                continue
                
            analyze_video_with_stacking(video_path, base_models, face_cascade, meta_model, scaler)
    else:
        print("Exiting. Not all required models were loaded.")

Using Apple M2 GPU (mps).

--- Loading Ensemble Models ---
Loading ResNet50...
Loading EfficientNet_B0...
Base models loaded.
Loading meta-model from stacking_logistic_regression.pkl...
Loading scaler from stacking_scaler.pkl...
Stacking model and scaler loaded.



Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_0046.mp4


Error: The file was not found at '/Users/visheshbishnoi/Desktop/data/fake/dfdc_fake_0046.mp4'. Please check the path.



Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/videos/fake/dfdc_fake_1351.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/videos/fake/dfdc_fake_1351.mp4


Processing Video: 100%|███████████████████████| 299/299 [00:10<00:00, 27.71it/s]


--- Video Analysis Complete ---
Time taken: 10.80 seconds
Frames with faces detected: 219
---------------------------------
Final Verdict: LIKELY REAL
Model Confidence: 89.22%
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):    /Users/visheshbishnoi/Desktop/videos/fake/dfdc_fake_1366.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/videos/fake/dfdc_fake_1366.mp4


Processing Video: 100%|███████████████████████| 299/299 [00:12<00:00, 24.40it/s]


--- Video Analysis Complete ---
Time taken: 12.26 seconds
Frames with faces detected: 294
---------------------------------
Final Verdict: LIKELY REAL
Model Confidence: 89.89%
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  /Users/visheshbishnoi/Desktop/videos/fake/celeb_fake_4969.mp4



Analyzing video file: /Users/visheshbishnoi/Desktop/videos/fake/celeb_fake_4969.mp4


Processing Video: 100%|███████████████████████| 441/441 [00:14<00:00, 31.21it/s]


--- Video Analysis Complete ---
Time taken: 14.14 seconds
Frames with faces detected: 435
---------------------------------
Final Verdict: LIKELY FAKE
Model Confidence: 99.95%
---------------------------------






Enter the path to the video you want to analyze (or 'exit' to quit):  exit
