# Bachelor Thesis Finn Franken

In [None]:
import os
import shutil
from shutil import copyfile
import random
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from transformers import ViTForImageClassification, ViTFeatureExtractor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Data Preprocessing

### Check the amount of data in each dataset

In [None]:
fakecount = 0
realcount = 0
for i in os.listdir("C:/Users/bolly/1BachelorThesis/fullmainfolder/FakeFaces"):
    fakecount+=1
for i in os.listdir("C:/Users/bolly/1BachelorThesis/fullmainfolder/RealFaces"):
    realcount+=1
    
print("Fakecount = ", fakecount)
print("Realcount = ", realcount)

### Combining the two datasets into a single, labelled set

real_ is added as a prefix to the real image names and ai_ is added to the ai image names. Images are then shuffled so the model can't learn from the order in which the data is fed in.

In [None]:
for filename in os.listdir("C:/Users/bolly/1BachelorThesis/fullmainfolder/RealFaces"):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        os.rename(
            os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/RealFaces", filename),
            os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/AllFaces", f'real_{filename}')
        )

for filename in os.listdir("C:/Users/bolly/1BachelorThesis/fullmainfolder/FakeFaces"):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        os.rename(
            os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/FakeFaces", filename),
            os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/AllFaces", f'ai_{filename}')
        )

#Gets a list of all images in the combined folder
combined_images = [f for f in os.listdir("C:/Users/bolly/1BachelorThesis/fullmainfolder/AllFaces") if f.endswith('.jpg') or f.endswith('.png')]

### Splitting the data into Training (80%) and Testing (20%)

In [None]:
total_images = len(combined_images)
train_size = int(0.8 * total_images)
test_size = total_images - train_size

train_images = random.sample(combined_images, train_size)

for image in combined_images:
    if image in train_images:
        shutil.move(os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/AllFaces", image), os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/TrainFaces", image))
    else:
        shutil.move(os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/AllFaces", image), os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/TestFaces", image))


### Organizing the images into the correct input format for ImageFolder

Test Set:

In [None]:
for image in os.listdir("C:/Users/bolly/1BachelorThesis/fullmainfolder/TestFaces"):
    if image.startswith('ai_'):
        shutil.move(os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/TestFaces", image), os.path.join("C:/Users/bolly/1BachelorThesis/mainfolder/TestFaces/fake", image))
    elif image.startswith('real_'):
        shutil.move(os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/TestFaces", image), os.path.join("C:/Users/bolly/1BachelorThesis/mainfolder/TestFaces/real", image))
    else:
        continue

Training Set:

In [None]:
for image in os.listdir("C:/Users/bolly/1BachelorThesis/fullmainfolder/TrainFaces"):
    if image.startswith('ai_'):
        shutil.move(os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/TrainFaces", image), os.path.join("C:/Users/bolly/1BachelorThesis/mainfolder/TrainFaces/fake", image))
    elif image.startswith('real_'):
        shutil.move(os.path.join("C:/Users/bolly/1BachelorThesis/fullmainfolder/TrainFaces", image), os.path.join("C:/Users/bolly/1BachelorThesis/mainfolder/TrainFaces/real", image))
    else:
        continue

This following code simply serves to remove the .ipynb_checkpoints file that Jupyterlab automatically creates for autosave purposes. It caused issues when calling datasets.ImageFolder since ImageFolder saw it, but raised an error as it was not a png, jpg, etc.

In [None]:
checkpoints_dir = 'C:/Users/bolly/1BachelorThesis/fullmainfolder/TrainFaces/.ipynb_checkpoints'

if os.path.exists(checkpoints_dir):
    shutil.rmtree(checkpoints_dir)

In [None]:
checkpoints_dir = 'C:/Users/bolly/1BachelorThesis/fullmainfolder/TestFaces/.ipynb_checkpoints'

if os.path.exists(checkpoints_dir):
    shutil.rmtree(checkpoints_dir)

### Creating Datasets And Dataloaders for use in the models

In [None]:
train_data_dir = "C:/Users/bolly/1BachelorThesis/fullmainfolder/TrainFaces"
test_data_dir = "C:/Users/bolly/1BachelorThesis/fullmainfolder/TestFaces"

transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #Standard values for models trained on ImageNet
])

train_dataset = datasets.ImageFolder(root=train_data_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_data_dir, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

# Models

## ResNet

In [None]:
model = models.resnet50(pretrained=True) #Loads pre-trained ResNet-50 model
num_ftrs = model.fc.in_features #Gets the number of input features for the fully connected layer
model.fc = nn.Linear(num_ftrs, 2) #Replaces the output layer for binary classification

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) #Stochastic Gradient Descent
criterion = nn.CrossEntropyLoss() #Cross-entropy loss

In [None]:
def train_model(model, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

train_model(model, criterion, optimizer, epochs=5)

In [None]:
def evaluate(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.numpy())
    
    accuracy = accuracy_score(y_true, y_pred) * 100
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    
evaluate(model, test_loader)

## ViT

In [None]:
from transformers import ViTForImageClassification, ViTImageProcessor
model_name = "google/vit-base-patch16-224-in21k" #Loads the pre-trained ViT model trained on ImageNet21k
model = ViTForImageClassification.from_pretrained(model_name) #Initializes the model
image_processor = ViTImageProcessor.from_pretrained(model_name)

optimizer = optim.Adam(model.parameters(), lr=1e-5) #Adam optimizer
criterion = nn.CrossEntropyLoss() #Cross-entropy loss

### Training ViT

In [None]:
def train(model, train_loader, optimizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.logits, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = outputs.logits.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%')

train(model, train_loader, optimizer, criterion, epochs=5)

### Evaluation

In [None]:
def evaluate(model, test_loader):
    model.eval()
    true_labels = []
    predicted_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = outputs.logits.max(1)
            true_labels.extend(labels.numpy())
            predicted_labels.extend(predicted.numpy())

    accuracy = (np.array(predicted_labels) == np.array(true_labels)).mean() * 100
    precision = precision_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)
    
    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

evaluate(model, test_loader)

## Noise Preprocessing

This part will add a Gaussian Noise transform to the training dataloader. The original images will remain unaffected.

In [None]:
class AddGaussianNoise(object):
    def __init__(self, mean=0, std=0.1):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        noise = torch.randn_like(tensor) * self.std + self.mean #Generates noise
        noisy_tensor = tensor + noise #Adds noise
        noisy_tensor = torch.clamp(noisy_tensor, 0, 1) #Ensures pixel values remain in the valid range
        return noisy_tensor

base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #Identical transformations to the ones used earlier
])

train_transform = transforms.Compose([
    base_transform,
    AddGaussianNoise(mean=0, std=0.1),  #Transformations with noise for training data
])

train_dataset = datasets.ImageFolder(root=train_data_dir, transform=train_transform)
test_dataset = datasets.ImageFolder(root=test_data_dir, transform=base_transform)

#Creates dataloaders with identical parameters to the ones sued before
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

## ResNet with Noise

In [None]:
model = models.resnet50(pretrained=True) #Loads pre-trained ResNet-50 model
num_ftrs = model.fc.in_features #Gets the number of input features for the fully connected layer
model.fc = nn.Linear(num_ftrs, 2) #Replaces the output layer for binary classification

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) #Stochastic Gradient Descent
criterion = nn.CrossEntropyLoss() #Cross-entropy loss

In [None]:
def train_model(model, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

train_model(model, criterion, optimizer, epochs=5)

In [None]:
def evaluate(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.numpy())
    
    accuracy = accuracy_score(y_true, y_pred) * 100
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    
evaluate(model, test_loader)

## ViT with Noise

In [None]:
from transformers import ViTForImageClassification, ViTImageProcessor
model_name = "google/vit-base-patch16-224-in21k" #Loads the pre-trained ViT model trained on ImageNet21k
model = ViTForImageClassification.from_pretrained(model_name) #Initializes the model
image_processor = ViTImageProcessor.from_pretrained(model_name)

optimizer = optim.Adam(model.parameters(), lr=1e-5) #Adam optimizer
criterion = nn.CrossEntropyLoss() #Cross-entropy loss

In [None]:
def train(model, train_loader, optimizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.logits, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = outputs.logits.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%')

train(model, train_loader, optimizer, criterion, epochs=5)

In [None]:
def evaluate(model, test_loader):
    model.eval()
    true_labels = []
    predicted_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = outputs.logits.max(1)
            true_labels.extend(labels.numpy())
            predicted_labels.extend(predicted.numpy())

    accuracy = (np.array(predicted_labels) == np.array(true_labels)).mean() * 100
    precision = precision_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)
    
    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

evaluate(model, test_loader)