In [1]:
from __future__ import print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import tensorflow as tf
from tensorflow.keras import Model
#from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import os
import glob
import cv2
import TransformCode as tc
from io import BytesIO
from PIL import Image
from numpy import expand_dims
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.callbacks import *
import warnings
import logging
from IPython.display import clear_output
from collections import Counter
import pickle
import json
import sys
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/augmentations')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/hyperparameters')
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/visualisations')
import VisualisationTools as plotting
import hyper_utils as hp

warnings.filterwarnings("ignore")
logger = logging.getLogger()
logger.setLevel(100)
plot_losses = plotting.PlotLearning()
os.chdir('/home/jupyter/DeepFake-2019-20')

# # Augmentation libraries
# import face_recognition
# import cutout_augmentation as ca

print("Tensorflow version:", tf.__version__)

Tensorflow version: 2.2.0-dlenv


In [2]:
# Specify config file
config_number=1010
config_file='config{}'.format(config_number)
def obtain_configs(number):
    '''Extracts hyperparameters from config file given the config file number.'''
    with open('../configs/config{}.json'.format(number)) as f:
        params = json.load(f)

    return params

params = obtain_configs(config_number)
params

{'architecture': 'lstm',
 'epochs': 50,
 'batch_size': 8,
 'learning_rate_type': 'constant',
 'learning_rate': 0.0001,
 'patience': 7,
 'weight_initialisation': 'imagenet',
 'optimiser': 'adam',
 'momentum': 0.99,
 'nesterov': 'False',
 'label_smoothing': 0,
 'dropout': 0.3,
 'target_size': 224,
 'class_weights': 'True',
 'warmup_epochs': 0}

In [3]:
import os
import glob
import keras
import VideoFrameGenerator
# use sub directories names as classes
classes = [i.split(os.path.sep)[-1] for i in glob.glob('../restructured-all-faces/home/jupyter/restructured_data/train/*')]
classes.sort() # actually already within source code
print(classes)
# some global params
SIZE = (224, 224)
CHANNELS = 3
NBFRAME = 20
BS = 8
# pattern to get videos and classes

glob_pattern_test = '../restructured-all-faces/home/jupyter/restructured_data/test/{classname}/*'

#glob_pattern='videos/{classname}/*.avi'
# for data augmentation
test_data = VideoFrameGenerator.VideoFrameGenerator(
        classes=classes, 
        glob_pattern=glob_pattern_test,
        nb_frames=NBFRAME,
    #    split=0, 
        shuffle=True,
        batch_size=BS,
        target_shape=SIZE,
        nb_channel=CHANNELS)

['authentic', 'fake']
Total data: 2 classes for 700 files for train


Using TensorFlow backend.


In [4]:
def build_model(dropout, lr_rate, architecture = 'lstm',frozen_base = True):
    frames = 20
    channels = 3
    rows = 224
    columns = 224
    
    embed_dim = 1280  # Embedding size for each token
    num_heads = 8  # Number of attention heads
    ff_dim = 32  # Hidden layer size in feed forward network inside transformer
    
    video = tf.keras.layers.Input(shape=(frames,
                         rows,
                         columns,
                         channels,))
        
    from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
    conv_base = MobileNetV2(weights='imagenet', include_top=False,
                        input_shape=(224,224,3))
    
    cnn_out = GlobalAveragePooling2D()(conv_base.output)
    cnn = tf.keras.Model(inputs=conv_base.input, outputs=cnn_out)
    #cnn.trainable = False
    
    encoded_frames = tf.keras.layers.TimeDistributed(cnn)(video)
        
    if architecture == 'lstm':
        
        encoded_sequence = tf.keras.layers.LSTM(512)(encoded_frames)
        hidden_layer = Dense(512, activation="relu")(encoded_sequence)
        hidden_layer2 = Dense(128, activation="relu")(hidden_layer)
        outputs = Dense(2, activation="softmax")(hidden_layer2)
        model = Model([video], outputs)
    
        
    if architecture == 'transformer':
        
        transformer_block = tc.TransformerBlock(embed_dim, num_heads, ff_dim)
        x = transformer_block(encoded_frames)
        x = tf.keras.layers.GlobalAveragePooling1D()(x)
        x = tf.keras.layers.Dropout(dropout)(x)
        x = tf.keras.layers.Dense(256, activation="relu")(x)
        x = tf.keras.layers.Dense(64, activation="relu")(x)
        x = tf.keras.layers.Dropout(dropout)(x)
        outputs = tf.keras.layers.Dense(2, activation="softmax")(x)
        model = Model([video], outputs)
                
    if frozen_base:
        cnn.trainable = False # freeze the convolutional base
        
    else: 
        cnn.trainable = True    
    
    optimizer = tf.keras.optimizers.Nadam(lr=lr_rate,
                      beta_1=0.9,
                      beta_2=0.999,
                      epsilon=1e-08,
                      schedule_decay=0.004)
    model.compile(loss="categorical_crossentropy",
                  optimizer=optimizer,
                  metrics=["accuracy"]) 
    print(model.summary())
    return model

In [5]:
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
    
with strategy.scope(): # Allows for parallel GPUs
    model = build_model(0, params['learning_rate'], params['architecture'], frozen_base = False)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localh

In [6]:
path_to_weights = "../all_faces_bucket/trained_models/weights/{}".format(config_file)
# get all the weights file names in a list
if os.path.exists(path_to_weights):
    all_weights = sorted(os.listdir(path_to_weights + '/'))
# If there is at least one file
    if len(all_weights) >= 1:
        # Use weights from highest val acc
        model.load_weights(path_to_weights + '/' + 'highest_val_acc.hdf5')
        print('Weights loaded')

Weights loaded


In [7]:
score, acc = model.evaluate(test_data,
                           # class_weight=class_weights,
                          #  initial_epoch=0,
                            verbose=1,
                            max_queue_size=100,                # maximum size for the generator queue
                            workers=16,                        # maximum number of processes to spin up when using process-based threading
                            use_multiprocessing=False)

