In [12]:
import kagglehub
from pathlib import Path
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# 1. Download the dataset
base_path = Path(kagglehub.dataset_download("xhlulu/140k-real-and-fake-faces"))

# 2. Define paths
dataset_root = base_path / "real_vs_fake" / "real-vs-fake"
train_dir = dataset_root / "train"
valid_dir = dataset_root / "valid"
test_dir  = dataset_root / "test"

# 3. Transforms
transform = transforms.Compose([
    transforms.Resize((128, 128)),    # Resize all images
    transforms.ToTensor(),            # Convert to Tensor
    transforms.Normalize([0.5]*3, [0.5]*3)  # Normalize to [-1, 1]
])

# 4. Load datasets
train_data = ImageFolder(train_dir, transform=transform)
valid_data = ImageFolder(valid_dir, transform=transform)
test_data  = ImageFolder(test_dir, transform=transform)

# 5. DataLoaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_data, batch_size=32, shuffle=False)

# 6. Check classes
print("Classes:", train_data.classes)
print("Train size:", len(train_data), "| Valid:", len(valid_data), "| Test:", len(test_data))


Classes: ['fake', 'real']
Train size: 100000 | Valid: 20000 | Test: 20000


In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import timm # A great library for computer vision models
import time

# --- Setup ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EPOCHS = 3 # Start with 3 epochs for a quick training session
LEARNING_RATE = 0.0001

print(f"‚úÖ Using device: {DEVICE}")

# --- Model ---
# Load a pre-trained XceptionNet model and adapt it for our 2 classes ('fake', 'real')
model = timm.create_model('xception', pretrained=True, num_classes=2)
model.to(DEVICE)

# --- Loss and Optimizer ---
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

# --- Training Loop ---
print("\nüöÄ Starting model training...")
start_time = time.time()

for epoch in range(EPOCHS):
    # Training phase
    model.train()
    train_loss, train_correct, train_total = 0, 0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    # Validation phase
    model.eval()
    valid_loss, valid_correct, valid_total = 0, 0, 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            valid_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            valid_total += labels.size(0)
            valid_correct += (predicted == labels).sum().item()

    # Print epoch results
    train_acc = 100 * train_correct / train_total
    valid_acc = 100 * valid_correct / valid_total
    print(f"Epoch {epoch+1}/{EPOCHS} | "
          f"Train Loss: {train_loss/train_total:.4f}, Train Acc: {train_acc:.2f}% | "
          f"Valid Loss: {valid_loss/valid_total:.4f}, Valid Acc: {valid_acc:.2f}%")

end_time = time.time()
print(f"\nüèÅ Training finished in {(end_time - start_time)/60:.2f} minutes.")

# --- Save the Model ---
MODEL_SAVE_PATH = "deepfake_detector_model.pth"
torch.save(model.state_dict(), MODEL_SAVE_PATH)
print(f"\n‚úÖ Model saved to: {MODEL_SAVE_PATH}")

‚úÖ Using device: cuda


  model = create_fn(



üöÄ Starting model training...
Epoch 1/3 | Train Loss: 0.1415, Train Acc: 94.37% | Valid Loss: 0.0479, Valid Acc: 98.28%
Epoch 2/3 | Train Loss: 0.0292, Train Acc: 98.98% | Valid Loss: 0.0437, Valid Acc: 98.31%
Epoch 3/3 | Train Loss: 0.0170, Train Acc: 99.40% | Valid Loss: 0.0289, Valid Acc: 98.94%

üèÅ Training finished in 39.81 minutes.

‚úÖ Model saved to: deepfake_detector_model.pth


In [14]:
# --- Load the saved model ---
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
model.to(DEVICE)
model.eval()

# --- Testing Loop ---
test_correct, test_total = 0, 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_acc = 100 * test_correct / test_total
print(f"\nüìä Final Test Accuracy: {test_acc:.2f}%")


üìä Final Test Accuracy: 99.00%


In [15]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(confusion_matrix(all_labels, all_preds))
print(classification_report(all_labels, all_preds, target_names=train_data.classes))


[[9910   90]
 [ 109 9891]]
              precision    recall  f1-score   support

        fake       0.99      0.99      0.99     10000
        real       0.99      0.99      0.99     10000

    accuracy                           0.99     20000
   macro avg       0.99      0.99      0.99     20000
weighted avg       0.99      0.99      0.99     20000

