# FYP: Image Deepfake Detection

This project looks into imgaes deepfake detection with various deep learning techniques.



### Setup

In [1]:
import torch
import random
import numpy as np
from torchvision import transforms
from utils.dataset import process_dataset, load_hf_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# set random seed for reproducibility
seed = 556
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x215854465b0>

In [3]:
# device = 'mps' if torch.backends.mps.is_available() else 'cpu'
device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'
torch.set_default_device(device)
print(f"Using device: {device}")

Using device: cuda


## <a name="#dp"></a>Data Preparation

This section downloads the dataset and performs data augmentation..

### Load Dataset

In [4]:
# load WildDeepfake dataset from Hugging Face Dataset Hub
dataset_name = "xingjunm/WildDeepfake"
train, val, test = load_hf_dataset(dataset_name, 10000, True)
dataset = {train, val}

image_size = 224
print(f"Train dataset: {train}, \nValidation dataset: {val}, \nTest dataset: {test}")

Attempting to load dataset from Hugging Face: xingjunm/WildDeepfake
Successfully loaded dataset from Hugging Face: xingjunm/WildDeepfake
dataset IterableDatasetDict({
    train: IterableDataset({
        features: ['png', '__key__', '__url__'],
        num_shards: 963
    })
    test: IterableDataset({
        features: ['png', '__key__', '__url__'],
        num_shards: 157
    })
})
Train dataset: IterableDataset({
    features: Unknown,
    num_shards: 963
}), 
Validation dataset: IterableDataset({
    features: Unknown,
    num_shards: 963
}), 
Test dataset: None


In [5]:
# load dataset into pytorch tensors
from utils.dataset import create_data_loaders


for split in dataset:
    split = split.with_format("torch")
    # split = process_dataset(split)

In [6]:
# data transformation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(image_size),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) #mean and std dev values for each channel from ImageNet (pretrain data)
    ]),
    'val': transforms.Compose([
        transforms.Resize(image_size),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) #mean and std dev values for each channel from ImageNet (pretrain data)
    ]),
    'test': transforms.Compose([
        transforms.Resize(image_size),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) #mean and std dev values for each channel from ImageNet (pretrain data)
    ]),
}

In [7]:
train

IterableDataset({
    features: Unknown,
    num_shards: 963
})

In [9]:
train_loader, val_loader, test_loader = create_data_loaders(train, val, val, data_transforms)  

In [None]:
train_loader