## **Deepfake Detection with a Convolutional Neural Network**
Daniel Radunsky, Adam Torres Encarnacion

#### **Import Packages**

In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [1]:
import zipfile
from pathlib import Path
from tqdm import tqdm

def unzip_file(zip_path, output_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        # Get a list of all the files in the zip archive
        files = zip_ref.namelist()
        
        # Show progress bar while extracting
        for file in tqdm(files, desc="Unzipping", unit="file"):
            zip_ref.extract(file, output_dir)

# Path to the zip file
zip_path = Path(r"c:\Users\kingd\Downloads\Dataset.zip")
# Directory to extract to
output_dir = Path("./data")

# Unzip the file
unzip_file(zip_path, output_dir)

Unzipping: 100%|██████████| 190335/190335 [05:12<00:00, 608.98file/s] 


In [25]:
train_dir = './data/Train'
test_dir = './data/Test'

transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 256x256
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize
])

# Load train and test datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

# Create DataLoaders for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [26]:
for images, labels in train_loader:
    print("Image shape:", images.shape)  # Shape of the batch of images
    print("Labels shape:", labels.shape)  # Shape of the batch of labels
    break  # Break after first batch to display only one sample

# Displaying a few samples from the test_loader
'''print("\nTest Data Sample:")
for images, labels in test_loader:
    print("Image shape:", images.shape)
    print("Labels shape:", labels.shape)'''

Image shape: torch.Size([32, 3, 128, 128])
Labels shape: torch.Size([32])


'print("\nTest Data Sample:")\nfor images, labels in test_loader:\n    print("Image shape:", images.shape)\n    print("Labels shape:", labels.shape)'

#### **Architecture**

In [30]:
class Detector(nn.Module):
    def __init__(self):
        super(Detector, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, 3, 1),  # Conv2d: kernel size 3, stride 1 (default)
            nn.ReLU(),
            nn.Conv2d(16, 4, 1),      # Conv2d: kernel size 1, stride 1 (default)
            nn.ReLU(),
            nn.BatchNorm2d(4),
            nn.MaxPool2d(8, 8)        # MaxPool2d: kernel size 8, stride 8
        )

        # Corrected number of input features to the Linear layer
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(4 * 15 * 15, 1),  # Corrected input size for the fully connected layer
            nn.Sigmoid()  # Sigmoid activation for binary classification (output range 0-1)
        )

    def forward(self, x):
        x = self.features(x)  # Apply feature extractor (conv + pool)
        x = self.classifier(x)  # Apply classifier (fully connected)
        return x

#### **GPU Optimization**

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = Detector().to(device)

cuda


#### **Training Loop**

In [None]:
criterion = nn.BCELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

def calculate_accuracy(outputs, labels):
    predicted = (outputs > 0.5).float()
    correct = (predicted == labels).sum().item()
    total = labels.size(0)
    accuracy = correct / total
    return accuracy


In [None]:
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []

num_epochs = 10
for epoch in range(num_epochs):
    model.train() 
    current_loss = 0.0
    running_corrects = 0


    for images, labels in train_loader:
        images, labels = images.to(device), labels.float().to(device)

        optimizer.zero_grad() 

        outputs = model(images) 
        loss = criterion(outputs.squeeze(), labels)

        loss.backward()
        optimizer.step()

        current_loss += loss.item()  
        running_corrects += calculate_accuracy(outputs.squeeze(), labels)  

    
    train_loss = current_loss / len(train_loader)
    train_accuracy = running_corrects / len(train_loader)

   
    model.eval()
    current_loss = 0.0
    current_correct = 0

    with torch.no_grad():  
        for images, labels in test_loader:
            images, labels = images.to(device), labels.float().to(device)

            outputs = model(images) 
            loss = criterion(outputs.squeeze(), labels)

            current_loss += loss.item() 
            current_correct += calculate_accuracy(outputs.squeeze(), labels) 

    
    test_loss = current_loss / len(test_loader)
    test_accuracy = current_correct / len(test_loader)

    
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy * 100:.2f}%")
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {100 * test_accuracy:.2f}%")

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

    
    print(f"Epoch {epoch + 1}:")
    print(f"Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy * 100:.2f}%")
    print(f"Testing Loss: {test_loss:.4f}, Testing Accuracy: {100 * test_accuracy:.2f}%")


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss', color='blue')
plt.plot(range(1, num_epochs + 1), test_losses, label='Test Loss', color='red')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Testing Loss')
plt.legend()


plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy', color='blue')
plt.plot(range(1, num_epochs + 1), test_accuracies, label='Test Accuracy', color='red')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Testing Accuracy')
plt.legend()

plt.tight_layout()
plt.show()
