Loading image dataset and CSV Files

In [None]:
import os 
import pandas
import numpy 
from augmentations import augmentations
from datasets import datasets
from PIL import Image
import cv2

images = []

for fil in os.listdir("../data/raw_data/images"):
    file_dir = os.path.join("../data/raw/data/images", fil)
    images.append(file_dir)

Loading Dataset information CSV File

In [None]:
image_loader = numpy.vectorize(pyfunc=lambda img: Image.open(img))

train_info = pandas.read_csv("../data/raw_data/information.csv")
train_info['image'] = image_loader(images)

In [None]:
train_info['Class'].map({
    'yes': 0,
    'no': 1,
})

Converting images to specific format

In [None]:
def to_png(img):
    success, png_data = cv2.imencode(ext='.png', img=img)
    if success:
        png_img = cv2.imdecode(png_data, cv2.IMREAD_UNCHANGED)
        return Image.fromarray(png_img)
    else:
        raise RuntimeError('Failed to convert image')

train_info['image'] = train_info['image'].apply(lambda img: to_png(img))

Checking data for Gaussian Noise, Impulse Noise & Salt-And-Papper Noise

In [None]:
from skimage.restoration import estimate_sigma, denoise_tv_chambolle


def has_gaussian_noise(img, threshold: float):
    pass 

def has_salt_and_paper_noise(img, threshold: float):
    pass 

def has_impulse_noise(img, threshold: int):
    pass

In [None]:
images = train_info['image'].tolist()

gaussian_thresh = 10 
impulse_thresh = 0.8
sp_thresh = 0.8

noisy_images = []

for image in images:
    if (
        has_gaussian_noise(image, gaussian_thresh) or 
        has_impulse_noise(image, impulse_thresh) or 
        has_salt_and_paper_noise(image, sp_thresh)
    ):
        noisy_images.append(image)

Initializing Datasets for training and validation

In [None]:
from sklearn.model_selection import train_test_split 

train_indices, val_indices = train_test_split(
    numpy.arrange(train_info.shape[0]), 
    test_size=0.3, 
    stratify=train_info['class'].tolist()
)

In [None]:
train_dataset = datasets.DeepFakeClassificationDataset(
    labels=train_info['class'][train_indices],
    images=train_info['image'][train_indices],
    transforms=augmentations.get_training_augmentations()
)

validation_dataset = datasets.DeepFakeClassificationDataset(
    labels=train_info['class'][val_indices],
    images=train_info['image'][val_indices],
    transforms=augmentations.get_validation_augmentations()
)

Post processing image quality estimation using SNR and SSIM

In [None]:
def estimate_ssim(trans_img, orig_img):
    pass

def estimate_snr(trans_img, orig_img):
    pass

for idx, img in enumerate(images):
    snr, ssim = estimate_snr(), estimate_ssim()
    print('%s: snr - %s; ssim - %s;', str(idx), snr, ssim)

Handling disbalance in the dataset using Class Weighting

In [None]:
CLASS_WEIGHTS = [1, 1]

train_dataset.weights = validation_dataset.weights = CLASS_WEIGHTS

Saving datasets to local storage

In [None]:
import pickle 

pickle.dump(obj=train_dataset, file=open("../data/augmented_data/train_dataset.pkl", mode='wb'))
pickle.dump(obj=validation_dataset, file=open("../data/augmented_data/validation_dataset.pkl", mode='wb'))