In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

# -------- Step 1: Set Paths --------
data_dir = 'E:/Downloads/extracted_faces'  # Folder with 'real' and 'fake' subfolders

In [2]:
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])  # Normalize to [-1, 1]
])

In [3]:
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
class_names = dataset.classes  # ['fake', 'real']
print(f"Classes: {class_names}")

Classes: ['fake', 'real']


In [4]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [6]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)  # 2 classes: real and fake

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

Epoch 1/10, Loss: 87.6529
Epoch 2/10, Loss: 52.4245
Epoch 3/10, Loss: 51.3112
Epoch 4/10, Loss: 50.4363
Epoch 5/10, Loss: 54.3824
Epoch 6/10, Loss: 53.9632
Epoch 7/10, Loss: 48.7617
Epoch 8/10, Loss: 45.9872
Epoch 9/10, Loss: 45.7583
Epoch 10/10, Loss: 50.1953


In [12]:
torch.save(model.state_dict(), 'deepfake_model.pth')


In [17]:
import os
import shutil
import random

def split_dataset(source_dir, train_dir, test_dir, split_ratio=0.8):
    for label in ['real', 'fake']:
        source_path = os.path.join(source_dir, label)
        train_path = os.path.join(train_dir, label)
        test_path = os.path.join(test_dir, label)
        os.makedirs(train_path, exist_ok=True)
        os.makedirs(test_path, exist_ok=True)

        all_images = []
        for root, _, files in os.walk(source_path):
            for file in files:
                if file.lower().endswith(('.jpg', '.png')):
                    all_images.append(os.path.join(root, file))

        random.shuffle(all_images)
        split_index = int(len(all_images) * split_ratio)

        for i, img_path in enumerate(all_images):
            if i < split_index:
                shutil.copy(img_path, train_path)
            else:
                shutil.copy(img_path, test_path)

        print(f"[INFO] {label}: {split_index} train, {len(all_images)-split_index} test")

# Example usage:
split_dataset(
    source_dir='E:/Downloads/extracted_faces',
    train_dir='E:/Downloads/dataset/train',
    test_dir='E:/Downloads/dataset/test',
    split_ratio=0.8
)


[INFO] real: 9376 train, 2344 test
[INFO] fake: 9864 train, 2467 test


In [22]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Image transformation (same as used during training)
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor()
])

# Load test dataset
test_dataset = datasets.ImageFolder(root='E:/downloads/dataset/test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [24]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 61.48%


In [27]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))


Confusion Matrix:
[[122   7]
 [ 92  36]]

Classification Report:
              precision    recall  f1-score   support

        fake       0.57      0.95      0.71       129
        real       0.84      0.28      0.42       128

    accuracy                           0.61       257
   macro avg       0.70      0.61      0.57       257
weighted avg       0.70      0.61      0.57       257



In [30]:
torch.save(model.state_dict(), 'deepfake_model.pth')
