# Run autoaugment to obtain best parameters
       -- built for FF+ dataset with file structure as required by Keras' flow_from_directory method
       
Requires 50GB RAM, at least 4 CPU workers and a GPU.

In [1]:
# See available GPU RAM 
# !nvidia-smi # can also be run from linux shell while GPU is training
# !nvidia-smi dmon # this will stream memory utilisation
# !watch -n0.1 nvidia-smi # better way to see GPU utilisation
# !htop # cpu threads and if they're all working
# !pip3 install --user --no-cache-dir -I tensorflow==2.2 # use if no gpu is attached so code will run 
# !sudo kill -9 PID # clear GPU memory where 9 is PID number
# !sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' # clear CPU memory

In [3]:
# Autoaugment installations and run TF2 upgrade scripts
# !tf_upgrade_v2 --intree ../augmentations/deepaugment-master --outtree ../augmentations/deepaugment_updated
# Then make some manual changes to ensure it runs using TF2
# Now install packages required by DeepAugment
# !pip install scikit-optimize
# !pip install imgaug
# !pip install git+https://github.com/qubvel/efficientnet

In [1]:
# This cell has the latest set up for AI Platform
from __future__ import print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import os
import glob
import cv2
from io import BytesIO
from PIL import Image
from numpy import expand_dims
import tensorflow.keras
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.callbacks import *
import warnings
import logging
from IPython.display import clear_output
from collections import Counter
import pickle
import json
import sys
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/augmentations')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/augmentations/deepaugment_updated/deepaugment')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/hyperparameters')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/visualisations')
import VisualisationTools as plotting
import hyper_utils as hp
import deepaugment as dp

warnings.filterwarnings("ignore")
logger = logging.getLogger()
logger.setLevel(100)
plot_losses = plotting.PlotLearning()
os.chdir('/home/jupyter/DeepFake-2019-20/augmentations')

# # Augmentation libraries
# import face_recognition
# import cutout_augmentation as ca

print("Tensorflow version:", tf.__version__)

Using TensorFlow backend.



Tensorflow version: 2.1.1-dlenv_tfe


## Load images into memory

In [2]:
train_authentic_image_paths = os.listdir('../../all_faces_disk/home/jupyter/forensics_split/train/authentic')
train_authentic_images = []

# Training data
for image_path in train_authentic_image_paths:
    img = cv2.imread('../../all_faces_disk/home/jupyter/forensics_split/train/authentic/' + image_path)
    # resizing is optional
    img=cv2.resize(img,(224,224))
    train_authentic_images.append(img)
    if len(train_authentic_images) % 1000 == 0: print(len(train_authentic_images))
        
    if len(train_authentic_images) == 3500:
        break
        
        
train_fake_image_paths = os.listdir('../../all_faces_disk/home/jupyter/forensics_split/train/fake')
train_fake_images = []

for image_path in train_fake_image_paths:
    img = cv2.imread('../../all_faces_disk/home/jupyter/forensics_split/train/fake/' + image_path)
    # resizing is optional
    img=cv2.resize(img,(224,224))
    train_fake_images.append(img)
    if len(train_fake_images) % 1000 == 0: print(len(train_fake_images))
        
    if len(train_fake_images) == 3500:
        break

1000
2000
3000
1000
2000
3000


In [3]:
# Concatenate authentic and fake images
training_data = np.array(train_authentic_images + train_fake_images)

# Create training labels
train_labels = np.array([0]*len(train_authentic_images) + [1]*len(train_fake_images))

print(training_data.shape, train_labels.shape)

(7000, 224, 224, 3) (7000,)


## Run Deepaugment

In [4]:
# Run DeepAugment

config = {
    "model": "efficientnet",
    "method": "bayesian_optimization",
    "train_set_size": 5000,
    "opt_samples": 1,
    "opt_last_n_epochs": 3,
    "opt_initial_points": 10,
    "child_epochs": 20,
    "child_first_train_epochs": 0,
    "child_batch_size": 64,
    "notebook_path": "./autoaugment_results.csv"
}


deepaug = dp.DeepAugment(training_data, train_labels, config=config)

Using 5000 training images
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/d

In [None]:
best_policies = deepaug.optimize(150)

trial: 1 
 ['add-to-hue-and-saturation', 0.8442657485810175, 'super-pixels', 0.8472517387841256, 'coarse-salt-pepper', 0.38438170729269994, 'emboss', 0.056712977317443194, 'sharpen', 0.47766511732135, 'histogram-equalize', 0.47997717237505744, 'dropout', 0.8360787635373778, 'additive-gaussian-noise', 0.6481718720511973, 'dropout', 0.9571551589530466, 'shear', 0.8700872583584366]
load_pre_augment_weights()'s runtime:  1.0065 sec.
Train on 30000 samples, validate on 1000 samples
Epoch 1/20
INFO:tensorflow:batch_all_reduce: 8 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
30000/30000 - 90s - loss: 7.5168 - accuracy: 0.5071 - val_loss: 7.8811 - val_accuracy: 0.4860
Epoch 2/20
30000/30000 - 63s - loss: 7.5689 - accuracy: 0.5065 - val_loss: 7.8811 - val_accuracy: 0.4860
Epoch 3/20
30000/30000 - 63s - loss: 7.5683 - accuracy: 0.5066 - val_loss: 7.8811 - val_accuracy: 0.4860
Epoch 4/20
30000/30000 - 63s - loss: 7.5683 - accura

## Obtain best policies 

In [2]:
def get_top_policies(df, k):
        """Prints and returns top-k policies

        Policies are ordered by their expected accuracy increas
        Args:
            k (int) top-k
        Returns
            pandas.DataFrame: top-k policies as dataframe
        """
        trial_avg_val_acc_df = (
            df.drop_duplicates(["trial_no", "sample_no"])
            .groupby("trial_no")
            .mean()["mean_late_val_acc"]
            .reset_index()
        )[["trial_no", "mean_late_val_acc"]]

        x_df = pd.merge(
            df.drop(columns=["mean_late_val_acc"]),
            trial_avg_val_acc_df,
            on="trial_no",
            how="left",
        )

        x_df = x_df.sort_values("mean_late_val_acc", ascending=False)

        baseline_val_acc = x_df[x_df["trial_no"] == 0]["mean_late_val_acc"].values[0]

        x_df["expected_accuracy_increase(%)"] = (
            x_df["mean_late_val_acc"] - baseline_val_acc
        )*100

        top_df = x_df.drop_duplicates(["trial_no"]).sort_values(
            "mean_late_val_acc", ascending=False
        )[:k]

        SELECT = [
            "trial_no",
            'A_aug1_type', 'A_aug1_magnitude', 'A_aug2_type', 'A_aug2_magnitude',
            'B_aug1_type', 'B_aug1_magnitude', 'B_aug2_type', 'B_aug2_magnitude',
            'C_aug1_type', 'C_aug1_magnitude', 'C_aug2_type', 'C_aug2_magnitude',
            'D_aug1_type', 'D_aug1_magnitude', 'D_aug2_type', 'D_aug2_magnitude',
            'E_aug1_type', 'E_aug1_magnitude', 'E_aug2_type', 'E_aug2_magnitude',
            "mean_late_val_acc", "expected_accuracy_increase(%)"
        ]
        top_df = top_df[SELECT]

        print(f"top-{k} policies:", k)
        print(top_df)

        return top_df

top_df = get_top_policies(pd.read_csv('./autoaugment_results.csv'), 20)

top-20 policies: 20
     trial_no                A_aug1_type  A_aug1_magnitude  \
70          3         coarse-salt-pepper             0.902   
438        21         histogram-equalize             0.934   
602        30         histogram-equalize             0.914   
81          4    additive-gaussian-noise             0.675   
580        29                     rotate             0.970   
138         6  add-to-hue-and-saturation             0.704   
390        19                       crop             0.960   
320        16             gamma-contrast             0.910   
523        26    additive-gaussian-noise             0.999   
141         7            horizontal-flip             0.533   
295        14              gaussian-blur             1.000   
471        23                     emboss             0.834   
550        27              vertical-flip             0.742   
679        33    additive-gaussian-noise             0.909   
224        11              vertical-flip          

In [3]:
top_df.to_csv('./top_df')