# Run autoaugment to obtain best parameters
       -- built for FF+ dataset with file structure as required by Keras' flow_from_directory method
       
Requires 50GB RAM, at least 4 CPU workers and a GPU.

In [1]:
# See available GPU RAM 
# !nvidia-smi # can also be run from linux shell while GPU is training
# !nvidia-smi dmon # this will stream memory utilisation
# !watch -n0.1 nvidia-smi # better way to see GPU utilisation
# !htop # cpu threads and if they're all working
# !pip3 install --no-cache-dir -I tensorflow==2.2 # use if no gpu is attached so code will run 
# !sudo kill -9 PID # clear GPU memory where 9 is PID number
# !sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' # clear CPU memory

In [2]:
# Autoaugment installations and run TF2 upgrade scripts
# !tf_upgrade_v2 --intree ../augmentations/deepaugment-master --outtree ../augmentations/deepaugment_updated
# Then make some manual changes to ensure it runs using TF2
# Now install packages required by DeepAugment
# !pip install scikit-optimize
# !pip install imgaug

In [3]:
# This cell has the latest set up for AI Platform
from __future__ import print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import os
import glob
import cv2
from io import BytesIO
from PIL import Image
from numpy import expand_dims
import tensorflow.keras
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.callbacks import *
import warnings
import logging
from IPython.display import clear_output
from collections import Counter
import pickle
import json
import sys
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/augmentations')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/augmentations/deepaugment_updated/deepaugment')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/hyperparameters')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/visualisations')
import VisualisationTools as plotting
import hyper_utils as hp
import deepaugment as dp

warnings.filterwarnings("ignore")
logger = logging.getLogger()
logger.setLevel(100)
plot_losses = plotting.PlotLearning()
os.chdir('/home/jupyter/DeepFake-2019-20/augmentations')

# # Augmentation libraries
# import face_recognition
# import cutout_augmentation as ca

print("Tensorflow version:", tf.__version__)

Using TensorFlow backend.



Tensorflow version: 2.2.0


## Load images into memory

In [4]:
train_authentic_image_paths = os.listdir('../../all_faces_disk/home/jupyter/forensics_split/train/authentic')
train_authentic_images = []

# Training data
for image_path in train_authentic_image_paths:
    img = cv2.imread('../../all_faces_disk/home/jupyter/forensics_split/train/authentic/' + image_path)
    # resizing is optional
    img=cv2.resize(img,(224,224))
    train_authentic_images.append(img)
    if len(train_authentic_images) % 1000 == 0: print(len(train_authentic_images))
        
    if len(train_authentic_images) == 500:
        break
        
        
train_fake_image_paths = os.listdir('../../all_faces_disk/home/jupyter/forensics_split/train/fake')
train_fake_images = []

for image_path in train_fake_image_paths:
    img = cv2.imread('../../all_faces_disk/home/jupyter/forensics_split/train/fake/' + image_path)
    # resizing is optional
    img=cv2.resize(img,(224,224))
    train_fake_images.append(img)
    if len(train_fake_images) % 1000 == 0: print(len(train_fake_images))
        
    if len(train_fake_images) == 500:
        break
        
        
        
# Validation data
val_authentic_image_paths = os.listdir('../../all_faces_disk/home/jupyter/forensics_split/validation/authentic')
val_authentic_images = []

for image_path in val_authentic_image_paths:
    img = cv2.imread('../../all_faces_disk/home/jupyter/forensics_split/validation/authentic/' + image_path)
    # resizing is optional
    img=cv2.resize(img,(224,224))
    val_authentic_images.append(img)
    if len(val_authentic_images) % 1000 == 0: print(len(val_authentic_images))
        
    if len(val_authentic_images) == 50:
        break
        
        
val_fake_image_paths = os.listdir('../../all_faces_disk/home/jupyter/forensics_split/validation/fake')
val_fake_images = []

for image_path in val_fake_image_paths:
    img = cv2.imread('../../all_faces_disk/home/jupyter/forensics_split/validation/fake/' + image_path)
    # resizing is optional
    img=cv2.resize(img,(224,224))
    val_fake_images.append(img)
    if len(val_fake_images) % 1000 == 0: print(len(val_fake_images))
        
    if len(val_fake_images) == 50:
        break

In [5]:
# from keras.datasets import fashion_mnist
# (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
# x_train[3:4]

In [6]:
# train_authentic_images
# train_fake_images
# val_authentic_images
# val_fake_images

# Concatenate authentic and fake images
training_data = np.array(train_authentic_images + train_fake_images)

# Create training labels
train_labels = np.array([0]*len(train_authentic_images) + [1]*len(train_fake_images))
train_labels

print(training_data.shape, train_labels.shape)

(1000, 224, 224, 3) (1000,)


In [7]:
# Run DeepAugment

config = {
    "model": "basiccnn",
    "method": "bayesian_optimization",
#     "train_set_size": 2000,
    "train_set_size": 2,
    "opt_samples": 3,
    "opt_last_n_epochs": 3,
    "opt_initial_points": 10,
    "child_epochs": 1,
    "child_first_train_epochs": 0,
    "child_batch_size": 64
}

deepaug = dp.DeepAugment(training_data, train_labels, config=config)

Using 2 training images
BasicCNN model built as child model.
 Model summary:
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 32)      896       
_________________________________________________________________
activation (Activation)      (None, 224, 224, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 222, 222, 32)      9248      
_________________________________________________________________
activation_1 (Activation)    (None, 222, 222, 32)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 111, 32)      0         
_________________________________________________________________
dropout (Dropout)            (None, 111, 111, 32)      0         
_____________________________________________

In [None]:
best_policies = deepaug.optimize(300)

trial: 1 
 ['brighten', 0.9023485831739845, 'gaussian-blur', 0.969809067746749, 'invert', 0.17090958513604518, 'emboss', 0.7506861412184564, 'brighten', 0.32504722900835253, 'crop', 0.6342740579573352, 'vertical-flip', 0.652790317005491, 'brighten', 0.9952995676778879, 'gamma-contrast', 0.4143685882263689, 'dropout', 0.6235101011318683]
brighten
invert
brighten
vertical-flip
gamma-contrast
load_pre_augment_weights()'s runtime:  0.5126 sec.


In [None]:
# best_policies
# train_authentic_images
training_data.max()

In [None]:
 def split_train_val_sets(X, y, train_set_size, val_set_size):
        """Splits given images randomly into `train` and `val_seed` groups

        val_seed -> is validation seed dataset, from where validation sets are sampled

        Args:
            X (numpy.array):
            y (numpy.array):
            train_set_size (int):
            val_set_size (int):
        return:
            dict: dict with keys `X_train`, `y_train`, `X_val_seed`, `y_val_seed`
        """
        if train_set_size == None:
            print(f"Using all training images")
            train_set_size = len(X) - val_set_size
        else:
            print(f"Using {train_set_size} training images")

        # reduce training dataset
        ix = np.random.choice(len(X), train_set_size, False)
        X_train = X[ix]
        y_train = y[ix]

        other_ix = set(np.arange(len(X))).difference(set(ix))
        other_ix = list(other_ix)
        X_val_seed = X[other_ix]
        y_val_seed = y[other_ix]

        data = {
            "X_train": X_train,
            "y_train": y_train,
            "X_val_seed": X_val_seed,
            "y_val_seed": y_val_seed,
        }
        return data

def preprocess(X, y, train_set_size, val_set_size=1000):
        """Preprocess images by:
            1. normalize to 0-1 range (divide by 255)
            2. convert labels to categorical)

        Args:
            X (numpy.array):
            y (numpy.array):
            train_set_size (int):
            val_set_size (int):

        Returns:
            dict: preprocessed data
        """
        

        data = split_train_val_sets(X, y, train_set_size, val_set_size)

        # normalize images
        data["X_train"] = data["X_train"].astype("float32") / 255
        data["X_val_seed"] = data["X_val_seed"].astype("float32") / 255

        # convert labels to categorical
        data["y_train"] = tf.keras.utils.to_categorical(data["y_train"])
        data["y_val_seed"] = tf.keras.utils.to_categorical(data["y_val_seed"])
        return data
    
a=preprocess(training_data, train_labels, 500)['X_train']

In [None]:
import imgaug
magnitude= 0.8472517387841256
X_norm=a
b=imgaug.augmenters.AddToHue(
            (int(-45 * magnitude), int(45 * magnitude))
        ).augment_images(
            X_norm.astype(np.uint8)
        )  # needs 0-1 values
b.max()

In [None]:
type(a.astype(np.uint64)[0][0][0][0])