I will try to make a network that classifies images as fake or real.

I will use pytorch because it's the library that I have the most experience with.

The train / validation / test split I will use is the one already present in the repo.



In [1]:
import torch
# global variables

# binary classification threshold
threshold = 0.5

on_gpu = True

batch_size = 256
num_epochs = 10
p_dropout = 0.2

#SGD params
start_lr = 1e-1
momentum = 0.3

#LR scheduler params
start_factor= 1
end_factor= .0001 
total_iters= 8


if on_gpu:
    device = torch.device("cuda")
    print(f"Using {device} device")
else:
    print("Running on the CPU")

Using cuda device


In [2]:
# HERE I ADDED THE TRANSFORM IN THE __INIT__ and in the __GETITEM__

# I will start by making the data loader for the 3 datasets

import os

from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision.transforms.functional import to_pil_image

# pytorch datasets are classes that have __init__, __len__, __getitem__ methods

class Custom_DS_Big(Dataset):

    def __init__(self, directory_name, transform = None):
        self.directory = "dataset big/" + directory_name
        
        self.n_fake_imgs = 0
        for filename in os.listdir(self.directory+ "/Fake"):
            self.n_fake_imgs += 1
        
        self.n_real_imgs = 0
        for filename in os.listdir(self.directory+ "/Real"):
            self.n_real_imgs += 1

        self.labels = torch.cat((torch.zeros(self.n_fake_imgs), torch.ones(self.n_real_imgs)))
        # with the line above I'm making 1 indicate a real image, and zero indicates a fake image
        # maybe it should be inverted: a 1 (or positive) should indicate a deepfake
        # while a 0 (or false) should indicate a real image
        # we will discuss and decide later
        self.length = self.labels.shape[0]

        self.transform = transform
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, index):
        label = self.labels[index]
        if index < self.n_fake_imgs:
            image = read_image(self.directory+ f"/Fake/fake_{index}.jpg")
        else:
            image = read_image(self.directory+ f"/Real/real_{index-self.n_fake_imgs}.jpg")
        if self.transform:
            image = to_pil_image(image)
            image = self.transform(image)     
        return image, label


In [3]:
# THATS NEW!

# creating a data augmentation technique to make the model more robust to variations in the data
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
# HERE I ADDED THE TRANSFORM FUNCTION

# while the dataset class retrieves one image at the time, the dataloader class retrieves one batch of images
from torch.utils.data import DataLoader

train_data = Custom_DS_Big("Train", transform = transform)
val_data = Custom_DS_Big("Validation", transform = transform)

train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True)


In [5]:
#uncomment below to test the dataloaders
# import matplotlib.pyplot as plt

# train_features, train_labels = next(iter(train_dataloader))
# print(f"Feature batch shape: {train_features.size()}")
# print(f"Labels batch shape: {train_labels.size()}")
# for i in range(train_labels.shape[0]):
#     img = train_features[i]
#     label = train_labels[i]
#     plt.imshow(img.permute((1,2,0)))
#     plt.title(f"Label: {label}")
#     plt.show()
#     if i > 4:
#         break

In [6]:
# I can now start with the actual NN
import torch.nn as nn

#defining the NN class
class NeuralNetwork(nn.Module):
    def __init__(self, sequence):
        super().__init__()
        self.sequence = sequence
    def forward(self, x):
        pred_proba = self.sequence(x)
        return pred_proba
    
#defining the functions that train and test

def train_loop(model, train_data_loader, optimizer_fn, loss_fn, on_gpu = on_gpu):

    model.train()

    for batch, (X,y) in enumerate(train_data_loader):
        y = y.unsqueeze(1)
        if on_gpu:
            X, y = X.to(device), y.to(device)

        log_proba = model(X)
        loss_value = loss_fn(log_proba, y)

        loss_value.backward()
        optimizer_fn.step()
        optimizer_fn.zero_grad()
        if batch % 150 == 0:
            print(f"Loss: {(loss_value/batch_size):.6f} | [{(batch+1) * batch_size}/{len(train_data_loader.dataset)}]")


def val_loop(model, val_data_loader, loss_fn, on_gpu = on_gpu):

    model.eval()
    loss, correct = 0,0

    with torch.no_grad():
        for X,y in val_data_loader:
            y = y.unsqueeze(1)
            if on_gpu:
                X, y = X.to(device), y.to(device)
            log_proba = model(X)
            preds = log_proba > threshold
            preds = preds.type(torch.float)
            loss += loss_fn(log_proba, y)
            correct += (preds == y).type(torch.float).sum().item()
    
    print(f"Accuracy {(correct/len(val_data_loader.dataset)):.2%} | Loss {(loss/len(val_data_loader.dataset)):6f}")

In [7]:
# HERE I MODIFIED THE NN ADDING A NORMALIZATION LAYER AND ONE MORE CONVOLUTIONAL LAYER. I ALSO ADDED THAT THE SECOND DROP OUT IS DROPPING MORE THINGS AND I CHANGED SOME PARAMETERS

# run this cell to make the model

first_model = NeuralNetwork(nn.Sequential(
    nn.Conv2d(3, 64, 3, stride = 2),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(64, 128, 3, stride=2),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(128, 256, 3, stride = 2),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(2304, 1000),
    nn.ReLU(),
    nn.Dropout(p_dropout),
    nn.Linear(1000, 500),
    nn.ReLU(),
    nn.Dropout(p_dropout+0.2),
    nn.Linear(500, 1),
    nn.Sigmoid()
    ))

if on_gpu:
    first_model.to(device)

# Binary cross entropy as loss function
loss_function = nn.BCELoss()
optimizer_function = torch.optim.SGD(first_model.parameters(),
                                     lr = start_lr, momentum= momentum)

scheduler = torch.optim.lr_scheduler.LinearLR(optimizer_function,
                                              start_factor= start_factor, 
                                              end_factor= end_factor, 
                                              total_iters= total_iters)

In [8]:
# run this cell to train the model

# val_loop(first_model, val_dataloader, loss_fn= loss_function)

for e in range(num_epochs):
    print(f"\nEpoch {e+1} | lr: {scheduler.get_last_lr()[0]:.5f}")
    train_loop(first_model, train_dataloader, optimizer_function, loss_function)
    val_loop(first_model, val_dataloader, loss_fn= loss_function)
    scheduler.step()


Epoch 1 | lr: 0.10000
Loss: 0.002706 | [256/140002]
Loss: 0.002386 | [38656/140002]
Loss: 0.001915 | [77056/140002]
Loss: 0.001592 | [115456/140002]
Accuracy 78.77% | Loss 0.001752

Epoch 2 | lr: 0.08750
Loss: 0.001739 | [256/140002]
Loss: 0.001266 | [38656/140002]
Loss: 0.000882 | [77056/140002]
Loss: 0.000996 | [115456/140002]
Accuracy 80.96% | Loss 0.001697

Epoch 3 | lr: 0.07500
Loss: 0.001040 | [256/140002]
Loss: 0.000659 | [38656/140002]
Loss: 0.000938 | [77056/140002]
Loss: 0.000967 | [115456/140002]
Accuracy 87.56% | Loss 0.001104

Epoch 4 | lr: 0.06250
Loss: 0.000801 | [256/140002]
Loss: 0.000787 | [38656/140002]
Loss: 0.000743 | [77056/140002]
Loss: 0.000641 | [115456/140002]
Accuracy 88.80% | Loss 0.001009

Epoch 5 | lr: 0.05000
Loss: 0.000456 | [256/140002]
Loss: 0.000683 | [38656/140002]
Loss: 0.000537 | [77056/140002]
Loss: 0.000667 | [115456/140002]
Accuracy 90.58% | Loss 0.000893

Epoch 6 | lr: 0.03751
Loss: 0.000413 | [256/140002]
Loss: 0.000523 | [38656/140002]
Loss:

total time to run the previous cell was 226 minutes 32 seconds

In [9]:
torch.save(first_model, "manuel_mods.pth")


In [10]:
first_model = torch.load("manuel_mods.pth")
first_model.eval()

NeuralNetwork(
  (sequence): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Flatten(start_dim=1, end_dim=-1)
    (11): Linear(in_features=2304, out_features=1000, bias=True)
    (12): ReLU()
    (13): Dropout(p=0.2, inplace=False)
    (14): Linear(in_features=1000, out_features=500, bias=True)
    (15): ReLU()
    (16): Dropout(p=0.4, inplace=False)
    (17): Linear(in_features=500, out_features=1, bias=True)
    (18): Sigmoid()
  )
)

In [11]:
val_loop(first_model, val_dataloader, loss_function)

Accuracy 91.62% | Loss 0.000802


In [12]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'Total number of parameters: {count_parameters(first_model)}')

Total number of parameters: 3176945
