In [12]:
# https://keras.io/examples/nlp/text_classification_with_transformer/
# https://github.com/tensorflow/tensor2tensor# See available GPU RAM 
!nvidia-smi # can also be run from linux shell while GPU is training
# !nvidia-smi dmon # this will stream memory utilisation
# !htop # cpu threads and if they're all working

/bin/bash: nvidia-smi: command not found


In [1]:
!python3 -m pip install tensorflow-gpu



In [2]:
# This cell has the latest set up for AI Platform

from __future__ import print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import os
import glob
import cv2
from io import BytesIO
from PIL import Image
from numpy import expand_dims
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.callbacks import *
import warnings
import logging
from IPython.display import clear_output
from collections import Counter
import pickle
import sys
sys.path.insert(1, '/home/jupyter/DeepFake-2019-20/visualisations')
import VisualisationTools as plotting

import TransformCode as tc

warnings.filterwarnings("ignore")
logger = logging.getLogger()
logger.setLevel(100)
plot_losses = plotting.PlotLearning()
os.chdir('/home/jupyter/DeepFake-2019-20')


print("Tensorflow version:", tf.__version__)

Tensorflow version: 2.2.0


In [3]:
#vocab_size = 20000  # Only consider the top 20k words
#maxlen = 200  # Only consider the first 200 words of each movie review
#(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
#print(len(x_train), "Training sequences")
#print(len(x_val), "Validation sequences")
#x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
#x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 Training sequences
25000 Validation sequences


In [None]:
embed_dim = 32  # Embedding size for each token
num_heads = 4  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
embedding_layer = tc.TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = tc.TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model_text = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model_text.summary()

Following [3], we do not fine-tune the network. 
The 2048-dimensional feature vec- tors after the last pooling layers are then used as the sequen- tial LSTM input.

The key challenge that we need to address is the de- sign of a model to recursively process a sequence in a mean- ingful manner. For this problem, we resort to the use of a 2048-wide LSTM unit with 0.5 chance of dropout, which is capable to do exactly what we need. More particularly, during training, our LSTM model takes a sequence of 2048- dimensional ImageNet feature vectors. The LSTM is followed by a 512 fully-connected layer with 0.5 chance of dropout. Then classic dropout, no auxiliary loss functions are necessary (but might be interesting right).

The optimizer is set to Adam[23] for end-to-endtraining of the complete model with a learning rate of 1e−5 and decay of 1e−6.


In [11]:
model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit(
    x_train, y_train, batch_size=32, epochs=4, validation_data=(x_val, y_val)
)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [14]:
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2


frames = 20
channels = 3
rows = 224
columns = 224

video = tf.keras.layers.Input(shape=(frames,
                     rows,
                     columns,
                     channels,))

conv_base = MobileNetV2(weights='imagenet', include_top=False,
                        input_shape=(224,224,3))

cnn_out = GlobalAveragePooling2D()(conv_base.output)
cnn = keras.Model(inputs=conv_base.input, outputs=cnn_out)
cnn.trainable = False

encoded_frames = tf.keras.layers.TimeDistributed(cnn)(video)
encoded_sequence = tf.keras.layers.LSTM(256)(encoded_frames)
hidden_layer = Dense(1024, activation="relu")(encoded_sequence)
outputs = Dense(2, activation="softmax")(hidden_layer)
model = Model([video], outputs)
optimizer = tf.keras.optimizers.Nadam(lr=0.002,
                  beta_1=0.9,
                  beta_2=0.999,
                  epsilon=1e-08,
                  schedule_decay=0.004)
model.compile(loss="categorical_crossentropy",
              optimizer=optimizer,
              metrics=["categorical_accuracy"]) 


#t_conv = Conv2D(320, (1,1), activation='relu')(outputconv_base)
#t_pool = MaxPooling2D(pool_size=(2, 2))(t_conv)
#t_flat = Flatten()(t_pool)
#convmodel = Model(inputs=conv_base.input, outputs=t_flat, name = 'model')


#t_dense2 = Dense(256, activation='relu')(t_flat)
#t_do = Dropout(0.3)(t_dense2)
#predictions = Dense(2, activation= 'softmax')(t_do)


#conv_base.trainable = False # freeze the convolutional base
#
#lr_rate = 1e-2
#opt = tf.keras.optimizers.Adam(learning_rate= lr_rate, decay=1e-6)
#   
#model.compile(loss='binary_crossentropy',
#              optimizer=opt,
#              metrics=['accuracy'])

In [15]:
model.summary()

Model: "model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_16 (InputLayer)        [(None, 20, 224, 224, 3)] 0         
_________________________________________________________________
time_distributed_4 (TimeDist (None, 20, 1280)          2257984   
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               1573888   
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              263168    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 2050      
Total params: 4,097,090
Trainable params: 1,839,106
Non-trainable params: 2,257,984
_________________________________________________________________


In [5]:
def augment_data(directory, batch):
    '''Prepares train-time augmentation using given training and validations data)
    
    Returns train_data, val_data'''

    datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=True,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=True,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            zca_epsilon=1e-06,  # epsilon for ZCA whitening
            rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
            # randomly shift images horizontally (fraction of total width)
            width_shift_range=0.1,
            # randomly shift images vertically (fraction of total height)
            height_shift_range=0.1,
            shear_range=0.,  # set range for random shear
            zoom_range=0.,  # set range for random zoom
            channel_shift_range=0.,  # set range for random channel shifts
            # set mode for filling points outside the input boundaries
            fill_mode='nearest',
            cval=0.,  # value used for fill_mode = "constant"
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False,  # randomly flip images
            # set rescaling factor (applied before any other transformation)
            rescale=None,
            # set function that will be applied on each input
            preprocessing_function=None,
            # image data format, either "channels_first" or "channels_last"
            data_format=None,
            # fraction of images reserved for validation (strictly between 0 and 1)
            )

    # Classes give the folders storing the two different categories
    train_data = datagen.flow_from_directory(directory + '/train',
                                             target_size=(224,224), batch_size = batch)
    
    val_data = datagen.flow_from_directory(directory + '/validation',
                                             target_size=(224,224), batch_size = batch)
    
    return train_data, val_data

In [6]:
def calculate_class_weights(train_data):
    '''Calculates class weights that weight the data based on the imbalance.
    Allows for better analysis in the case of imbalanced data - has no effect
    if data is balanced since the weights are then equal for each class.
    Use the generator obtained from the flow_from_directory method to obtain
    the class_weights.
    
    Input:
    train_data: the generator obtained during augmentation
    
    Returns a dictionary with class weights, required format for training'''
    
    # Calculate class weights which are required to fully balance the classes
    # Compares frequencies of appearence for each distinct label
    
    # The line of code below can be used on a generator to find the index labels
    print('Ensure class weights function corresponds to these class indices:',
          train_data.class_indices)
    
    counter = Counter(train_data.classes)                          
    max_val = float(max(counter.values()))       
    class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}                     

    return class_weights

In [None]:
def save_model_from_best_weights(dropout, lr_rate, architecture):
    '''Takes the latest saved weights and saves the corresponding model.'''
    model = build_model(dropout, lr_rate, architecture)
    load_model_weights(model, architecture)
    model.save('../all_faces_bucket/trained_models/saved_models/' + architecture + '_model.h5')    

In [None]:
def train_model(model, train_data, val_data, epochs, class_weights, architecture):
    '''Trains a provided model.
    Takes 6 arguments:
    
    1. model: a built model with an architecture specified in the build function
    2. train_data: augmented data obtained from the augment_data function
    3. val_data: validation data obtained from the augment_data function
    4. epochs -- number of epochs
    5. class weights -- a dictionary with weights (equal for balanced data so
    no negative impact)
    6. architecture: can choose vgg, xception, resnet50, mobilenet or efficientnet
    '''
    
    # Make a trained_models folder if it doesn't exist
    if not os.path.exists('../all_faces_bucket/trained_models'):
        os.makedirs('../all_faces_bucket/trained_models')
    
    # Make a weights folder if it doesn't exist
    if not os.path.exists('../all_faces_bucket/trained_models/weights'):
        os.makedirs('../all_faces_bucket/trained_models/weights')
        
    # Make a weights folder for the architecture if it doesn't exist
    if not os.path.exists('../all_faces_bucket/trained_models/weights/' + architecture):
        os.makedirs('../all_faces_bucket/trained_models/weights/' + architecture)

    # Save weights - below saves every epoch where there is improvement
    # filepath="../all_faces_bucket/trained_models/weights/" + architecture + "/epochs:{epoch:03d}-val_acc:{val_accuracy:.3f}.hdf5"
    # Below saves on file - the weights with the highest validation accuracy
    filepath="../all_faces_bucket/trained_models/weights/" + architecture + "/highest_val_acc.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', 
                                verbose=1, save_best_only=True, mode='max')
    
    # Make a folder to store training accuracies if it doesn't exist
    if not os.path.exists('../all_faces_bucket/trained_models/training_accuracies'):
        os.makedirs('../all_faces_bucket/trained_models/training_accuracies')
    
    # Callback to save training accuracies after each epoch
    csv_logger = CSVLogger('../all_faces_bucket/trained_models/training_accuracies/' + architecture + ".csv",
                           separator=',', append=True)
    
    # Stop after 3 epochs if val_accuracy doesn't improve
    es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=7)
                          
    # Load previous weights from training if there are any
    load_model_weights(model, architecture)

    history = model.fit(train_data, epochs=epochs, shuffle=True,
              steps_per_epoch = train_data.n//train_data.batch_size,
              validation_data = val_data, 
              validation_steps = val_data.n//val_data.batch_size,
              class_weight=class_weights,
              callbacks=[plot_losses, checkpoint, csv_logger, es],
              verbose=1,
              max_queue_size=30,                # maximum size for the generator queue
              workers=16,                        # maximum number of processes to spin up when using process-based threading
              use_multiprocessing=False)
    
    # Make a saved models folder if it doesn't exist
    if not os.path.exists('../all_faces_bucket/trained_models/saved_models'):
        os.makedirs('../all_faces_bucket/trained_models/saved_models')

In [None]:
def run_training(dropout = 0.5, lr_rate = 0.0001, architecture = 'vgg', 
                 batch = 32, epochs = 50):

    '''Builds a model based on the specified architecture, augments training
    data (reserving a fraction for validation), then computes class weights to
    balance data and trains the model.
    
    Inputs:
    1. dropout  -- for the model
    2. lr_rate
    3. architecture -- a choice of vgg, resnet50, mobilenet, xception and efficientnet
    4. batch -- batch size
    5. epochs -- number of epochs
    '''

    # Build a model, augment data, get class_weights and train the model
    # Strategy scope allows us to leverage multiple GPUs
    strategy = tf.distribute.MirroredStrategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
    
    with strategy.scope(): # Everything that creates variables should be under the strategy scope.
        model = build_model(dropout, lr_rate, architecture)
    train_data, val_data = augment_data('../all_faces_disk/home/jupyter/forensics_split', batch)
    class_weights = calculate_class_weights(train_data)
    trained_model = train_model(model, train_data, val_data, epochs, class_weights, architecture)

In [None]:
run_training(dropout = 0.5, lr_rate = 0.0002, architecture = 'mobilenet', batch = 256, epochs = 100)