In [None]:
import os, glob, itertools, cv2, torch, math
import pandas as pd
import numpy as np 
import albumentations as A
from PIL import Image
from facenet_pytorch.models.mtcnn import MTCNN
from random import randint
import matplotlib.pyplot as plt
%matplotlib inline

In [1]:
test_path = 'Test\\test-task1'
test_text = os.path.join(test_path, 'label-task1.txt')
train_root_paths = [os.path.join('Train\\', f) for f in os.listdir('Train\\')]
real_images = [f for f in train_root_paths if '0' in f]
fake_images = [f for f in train_root_paths if '1' in f]

def read_test(path, test_path):
    imgs, labels = [], []
    with open(path) as f:
        for line in f:
            img, label = line.split(' ')
            imgs.append(os.path.join(test_path, img))
            labels.append(int(label))
    return np.array(imgs), np.array(labels)

def read_train(paths):
    train_paths = []
    for path in paths:
        train_paths.append([os.path.join(path, file) for file in os.listdir(path)])
    t_path = np.array(sum(train_paths, []))
    return t_path

test, y_test = read_test(test_text, test_path)
train_real = read_train(real_images)
train_fake = read_train(fake_images)
y_real = np.zeros(shape=train_real.shape, dtype=np.int8)
y_fake = np.ones(shape=train_fake.shape, dtype=np.int8)
train = np.concatenate((train_real, train_fake), axis=0)
y = np.concatenate((y_real, y_fake), axis=0)

NameError: name 'os' is not defined

In [None]:
def mtcnn_params(img):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=True, device=device)
    boxes, probs, landmarks = mtcnn.detect(img, landmarks=True)
    return boxes, probs, landmarks

In [None]:
def blackout_eyes(img, boxes, landmarks):
  left_eye_x, left_eye_y = landmarks[0][0][0], landmarks[0][0][1]
  right_eye_x, right_eye_y = landmarks[0][1][0], landmarks[0][1][1]

  box_width, box_height = boxes[0][2]*0.8, boxes[0][3]*0.1              
  #defining width and height of black rectangle to be created
  width_margin, height_margin = 0.1*box_width, box_height*0.4
  ## old--->> width_margin, height_margin = abs(left_eye_x - right_eye_x)*0.25, 10
  #width_margin, height_margin = width*0.8, box_height*0.1
  if(landmarks[0][0][1] >= landmarks[0][1][1]):
    start = (math.ceil(left_eye_x - width_margin), math.ceil(left_eye_y - height_margin))
    end = (math.ceil(right_eye_x + width_margin), math.ceil(right_eye_y + height_margin))
  else:
    end = (math.ceil(left_eye_x - width_margin), math.ceil(left_eye_y - height_margin))
    start = (math.ceil(right_eye_x + width_margin), math.ceil(right_eye_y + height_margin))
  cv2.rectangle(img, start, end, (0,0,0),-1)
  return img

def blackout_nose(img, boxes, landmarks):
  nose_x, nose_y = landmarks[0][2][0], landmarks[0][2][1]
  box_width, box_height = boxes[0][2]*0.8, boxes[0][3]*0.1
  width_margin, height_margin = 0.15*box_width, box_height*0.5
  start = (math.ceil(nose_x - width_margin), math.ceil(nose_y - height_margin*1.5))
  end = (math.ceil(nose_x + width_margin), math.ceil(nose_y + height_margin))
  cv2.rectangle(img, start, end, (0,0,0),-1)
  return img

def blackout_mouth(img, boxes, landmarks):
  left_mouth_x, left_mouth_y = landmarks[0][3][0], landmarks[0][3][1]
  right_mouth_x, right_mouth_y = landmarks[0][4][0], landmarks[0][4][1]
  box_width, box_height = boxes[0][2]*0.5, boxes[0][3]*0.15
  #defining width and height of black rectangle to be created
  width_margin, height_margin = 0.1*box_width, box_height*0.35
  ## old--->> width_margin, height_margin = abs(left_eye_x - right_eye_x)*0.25, 10
  #width_margin, height_margin = width*0.8, box_height*0.1
  start = (math.ceil(left_mouth_x - width_margin), math.ceil(left_mouth_y - height_margin))
  end = (math.ceil(right_mouth_x + width_margin), math.ceil(right_mouth_y + height_margin))
  cv2.rectangle(img, start, end, (0,0,0),-1)
  return img 

In [None]:
def blackout(img, boxes, probs, landmarks):
  if not probs:     #incase no face detected by MTCNN
    return img

  if probs < 0.95:  #in case MTCNN not sure about exact face landmarks, can be due to blurring
    return img

  m = randint(0, 3)  ##Random number
  
  if m==0:
    return blackout_eyes(img, boxes, landmarks)
  
  elif m==1:
    return blackout_nose(img, boxes, landmarks)

  else:
    return blackout_mouth(img, boxes, landmarks)

In [8]:
style_gan2_path = 'Train/1-STYLEGAN2/'
files=os.listdir(style_gan2_path)  #Gives list of all files(images) in that folder--->no change required
COUNT = 1
for file in files:  #no change required for these 5 lines
  img = cv2.imread(style_gan2_path + f"{file}")  #reading the file
  #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) NO NEED TO CONVERT BGR TO RGB
  boxes, probs, landmarks = mtcnn_params(img)  #getting landmarks of face
  try:
    new_img = blackout(img, boxes, probs, landmarks) #applying blackout on the image
  except:
    print(file)
    continue
  for i in range(6):
    resizeTransform = A.Compose([A.Resize(height=160, width=160, interpolation=1)], 
                                p=1)
    new_img = resizeTransform(image = new_img)
    new_img = new_img['image']
    transform =A.Compose([
      A.ImageCompression(quality_lower = 80, quality_upper=100, p=0.7),
      A.GaussNoise(p=0.1),
      A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.5),
      A.Flip(p = 0.4),

      A.OneOf([
        A.GaussianBlur(blur_limit=(1,3), sigma_limit=0, p=0.7),
        A.MedianBlur(blur_limit=(1,3), always_apply=False, p=0.3),
      ], p = 0.6),

      A.OneOf([
        A.Rotate(limit=180, interpolation=1, border_mode=4, always_apply=False, p=0.7),
        A.Rotate(limit=180, interpolation=1, border_mode=0, always_apply=False, p=0.3),
      ], p = 0.7),

      A.OneOf([
        A.Affine(scale = 1.4, translate_percent= None,shear = None, p=0.5),
        A.Affine(scale = None,translate_percent= 0.1, shear = None, p=0.25),
        A.Affine(scale = None,translate_percent= None,shear = (10,10), p=0.25), 
      ], p = 0.5),  

    ], p = 0.9)
    
    aug = transform(image = new_img)["image"] 
    folder_to_save_img = "Train/STYLEGAN2 AUGMENTED"  #Path of folder you created  ---> Change this accordingly
    name_of_augmented_file = f"1_STYLEGAN2_A_{COUNT}.jpg"  #Naming convention to be followed --> Change this accordingly, only 0/1 and celebA to be changed
    
    cv2.imwrite(f"{folder_to_save_img}/{name_of_augmented_file}", aug) #saving the augmented image--> no change required
    COUNT+=1


000364.png
000406.png
000430.png
000574.png
000667.png
000898.png
001006.png
001121.png
001159.png


In [26]:
import os
import numpy as np
import shutil

# # Creating Train / Val / Test folders (One time use)
root_dir = 'Train/STYLEGAN2 AUGMENTED'   #change accordingly
train_dir = "Train/STYLEGAN2_Aug_Train"  #change accordingly
val_dir = "Train/STYLEGAN2_Aug_Val"      #change accordingly
test_dir = "Train/STYLEGAN2_Aug_Test"    #change accordingly
try:
    os.mkdir(train_dir)
    os.mkdir(val_dir)
    os.mkdir(test_dir)
except:
    print("Folder already exists")


In [27]:
allFileNames = os.listdir(root_dir)
np.random.shuffle(allFileNames)

train_FileNames, val_FileNames, test_FileNames = np.split(np.array(allFileNames),
                                                          [int(len(allFileNames)*0.8), int(len(allFileNames)*0.9)])


train_FileNames = [root_dir+'/'+ name for name in train_FileNames.tolist()]
val_FileNames = [root_dir+'/' + name for name in val_FileNames.tolist()]
test_FileNames = [root_dir+'/' + name for name in test_FileNames.tolist()]


print('Total images: ', len(allFileNames))
print('Training: ', len(train_FileNames))
print('Validation: ', len(val_FileNames))
print('Testing: ', len(test_FileNames))

# os.makedirs(root_dir +'/train')
# os.makedirs(root_dir +'/val')
# os.makedirs(root_dir +'/test')

#Copy-pasting images
for name in train_FileNames:
    shutil.copy(name, train_dir)

for name in val_FileNames:
    shutil.copy(name, val_dir)

for name in test_FileNames:
    shutil.copy(name, test_dir)

Total images:  5952
Training:  4761
Validation:  595
Testing:  596


In [1]:
import tensorflow as tf
import numpy as np
import os, shutil, random
SEED = 0
tf.keras.utils.set_random_seed(SEED)
np.random.seed(SEED)

In [2]:
root_train_real = 'Sources\Train\Real'
root_train_fake = 'Sources\Train\Deepfake'
root_test_real = 'Sources\Test\Real'
root_test_fake = 'Sources\Test\Deepfake'
train_number_of_images = 10000
test_number_of_images = 6000
ratio = 0.5

In [22]:
class Dataset:
    def __init__(self, real_root, fake_root, NO_Images, ratio=ratio, seed=SEED):
        self.real_root = real_root
        self.fake_root = fake_root
        self.NO_Images = NO_Images
        self.ratio = ratio
        self.seed = seed
        self.files_real = []
        self.files_fake = []
    
    def create_files_from_directories(self, ip):
        root_path = None
        tot_files = []
        if ip == 'real':
            root_path = self.real_root
        elif ip == 'fake':
            root_path = self.fake_root
        else:
            raise ValueError("Either: 'real' or 'fake' ip must be specified")
        for dir in os.listdir(root_path):
            path = os.path.join(root_path, dir)
            files_in_path = os.listdir(path)
            files_in_path = [path + '\\' + file_in_path for file_in_path in files_in_path]
            tot_files = tot_files + files_in_path
        return tot_files
    
    def set_files(self):
        self.files_real = self.create_files_from_directories(ip='real')
        self.files_fake = self.create_files_from_directories(ip='fake')

        
    def pick_random_images(self, ip):
        file = []
        if ip == 'real':
            file = self.files_real
        elif ip == 'fake':
            file = self.files_fake
        else:
            raise ValueError("Either: 'real' or 'fake' ip must be specified")
        random.seed(self.seed)
        sampling = int(self.NO_Images*self.ratio)
        if sampling > len(file):
            sampling = len(file)
        return random.sample(file, sampling)
    
    def randomize(self):
        self.set_files()
        self.files_real_randomized = self.pick_random_images('real')
        self.files_fake_randomized = self.pick_random_images('fake')
        
    def get_files(self):
        self.randomize()
        return self.files_real_randomized, self.files_fake_randomized

    def combine_real_fake(self):
        self.randomize()
        random.seed(SEED)
        combined = self.files_real_randomized + self.files_fake_randomized
        self.X = random.sample(combined, len(combined))

    def Create_Dataset(self):
        self.combine_real_fake()
        self.y = []
        for x in self.X:
            self.y.append(x.split("\\")[4][0])
    
    def list2array(self):
        self.X = np.array(self.X, dtype='object')
        self.y = np.array(self.y, dtype=np.int8)

    def get_Dataset(self):
        self.Create_Dataset()
        self.list2array()
        return self.X, self.y
    
data = Dataset(root_train_real, root_train_fake, train_number_of_images)
X, y = data.get_Dataset()

In [23]:
X

array(['Sources\\Train\\Deepfake\\1_GDWCT_Aug_Train\\1_GDWCT_A_4686.jpg',
       'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_692.jpg',
       'Sources\\Train\\Real\\AUG_FFHQ_Train\\0_ffhq_augmented_8736.jpg',
       ...,
       'Sources\\Train\\Deepfake\\1_StarGAN_Aug_Train\\0_StarGAN_1676.jpg',
       'Sources\\Train\\Real\\AUG_FFHQ_Train\\0_ffhq_augmented_7896.jpg',
       'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_2513.jpg'],
      dtype=object)

In [20]:
y

array([1, 1, 0, ..., 0, 0, 1], dtype=int8)

In [21]:
X

array(['Sources\\Train\\Deepfake\\1_GDWCT_Aug_Train\\1_GDWCT_A_4686.jpg',
       'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_692.jpg',
       'Sources\\Train\\Real\\AUG_FFHQ_Train\\0_ffhq_augmented_8736.jpg',
       ...,
       'Sources\\Train\\Deepfake\\1_StarGAN_Aug_Train\\0_StarGAN_1676.jpg',
       'Sources\\Train\\Real\\AUG_FFHQ_Train\\0_ffhq_augmented_7896.jpg',
       'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_2513.jpg'],
      dtype=object)

In [131]:
def create_files_from_directory(directory):
    files = []
    for dir in os.listdir(directory):
        path = os.path.join(directory, dir)
        files_in_path = os.listdir(path)
        files_in_path = [path + '\\' + file_in_path for file_in_path in files_in_path]
        files = files + files_in_path
    return files

def pick_random_images(file, NOImages, ratio=ratio, seed=SEED):
    random.seed(SEED)
    sampling = int(NOImages*ratio)
    if sampling > len(file):
        sampling = len(file)
    return random.sample(file, sampling)

def combine_real_fake(real, fake):
    random.seed(SEED)
    combined = real + fake
    return random.sample(combined, len(combined))

def target_array(X):
    pass

real_train_files = create_files_from_directory(root_train_real)
fake_train_files = create_files_from_directory(root_train_fake)
real_train_files_randomized = pick_random_images(real_train_files, train_number_of_images)
fake_train_files_randomized = pick_random_images(fake_train_files, train_number_of_images)
combined_train = combine_real_fake(real_train_files_randomized, fake_train_files_randomized)

real_test_files = create_files_from_directory(root_test_real)
fake_test_files = create_files_from_directory(root_test_fake)
real_test_files_randomized = pick_random_images(real_test_files, test_number_of_images)
fake_test_files_randomized = pick_random_images(fake_test_files, test_number_of_images)
combined_test = combine_real_fake(real_test_files_randomized, fake_test_files_randomized)

In [133]:
combined_test

['Sources\\Test\\Deepfake\\STYLEGAN_Aug_Test\\1_STYLEGAN_003671.jpg',
 'Sources\\Test\\Deepfake\\STYLEGAN2_Aug_Test\\1_STYLEGAN2_A_3153.jpg',
 'Sources\\Test\\Real\\CelebA_Aug_Test\\0_CelebA_A_2444.jpg',
 'Sources\\Test\\Real\\CelebA_Aug_Test\\0_CelebA_A_8693.jpg',
 'Sources\\Test\\Deepfake\\StarGAN_Aug_Test\\0_StarGAN_2482.jpg',
 'Sources\\Test\\Deepfake\\AttGAN_Aug_Test\\1_AttGAN_A_4850.jpg',
 'Sources\\Test\\Deepfake\\StarGAN_Aug_Test\\0_StarGAN_4746.jpg',
 'Sources\\Test\\Real\\ffhq_aug_test\\0_ffhq_augmented_6426.jpg',
 'Sources\\Test\\Deepfake\\STYLEGAN2_Aug_Test\\1_STYLEGAN2_A_4227.jpg',
 'Sources\\Test\\Real\\ffhq_aug_test\\0_ffhq_augmented_4221.jpg',
 'Sources\\Test\\Deepfake\\STYLEGAN_Aug_Test\\1_STYLEGAN_000956.jpg',
 'Sources\\Test\\Real\\CelebA_Aug_Test\\0_CelebA_A_3839.jpg',
 'Sources\\Test\\Deepfake\\STYLEGAN2_Aug_Test\\1_STYLEGAN2_A_2205.jpg',
 'Sources\\Test\\Real\\ffhq_aug_test\\0_ffhq_augmented_11059.jpg',
 'Sources\\Test\\Real\\ffhq_aug_test\\0_ffhq_augmented_14270.

In [134]:
combined_train

['Sources\\Train\\Deepfake\\1_GDWCT_Aug_Train\\1_GDWCT_A_4686.jpg',
 'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_692.jpg',
 'Sources\\Train\\Real\\AUG_FFHQ_Train\\0_ffhq_augmented_8736.jpg',
 'Sources\\Train\\Real\\AUG_FFHQ_Train\\0_ffhq_augmented_14662.jpg',
 'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_5506.jpg',
 'Sources\\Train\\Deepfake\\1_StarGAN_Aug_Train\\0_StarGAN_342.jpg',
 'Sources\\Train\\Deepfake\\1_STYLEGAN2_Aug_Train\\1_STYLEGAN2_A_1447.jpg',
 'Sources\\Train\\Real\\AUG_CELEBA_Train\\0_CelebA_A_2888.jpg',
 'Sources\\Train\\Deepfake\\1_StarGAN_Aug_Train\\0_StarGAN_5643.jpg',
 'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_358.jpg',
 'Sources\\Train\\Deepfake\\1_GDWCT_Aug_Train\\1_GDWCT_A_623.jpg',
 'Sources\\Train\\Real\\AUG_CELEBA_Train\\0_CelebA_A_14064.jpg',
 'Sources\\Train\\Deepfake\\1_AttGAN_Aug_Train\\1_AttGAN_A_2056.jpg',
 'Sources\\Train\\Real\\AUG_FFHQ_Train\\0_ffhq_augmented_214.jpg',
 'Sources\\Train\\Real\\AUG_CELEBA_Trai