In [1]:
# For Google Colab use
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    %cd '/content/drive/MyDrive/Colab Notebooks/MLP-DeepfakeDetection-VariationalAutoencoder'    
except ModuleNotFoundError:
    pass

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/MLP-DeepfakeDetection-VariationalAutoencoder


In [2]:
# Imports
from __future__ import division

import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from numpy.random import seed

import tensorflow as tf

import keras
from keras import preprocessing
from keras.preprocessing.image import ImageDataGenerator
from keras import layers, Model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import *
from keras.applications import *
from keras import metrics
from keras.losses import BinaryCrossentropy
from keras import backend as K

# !pip install -U keras-tuner
# from kerastuner.tuners import RandomSearch, Hyperband
# from kerastuner.engine.hypermodel import HyperModel
# from kerastuner.engine.hyperparameters import HyperParameters
# from kerastuner import Objective

import Models.OCFakeDectVAE as OriginalOCFakeDectVAE

In [3]:
# Check GPU available
%tensorflow_version 2.x
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Found GPU at: /device:GPU:0
Num GPUs Available:  1


In [4]:
# General model settings
IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS = 100, 100, 3
EPOCHS = 10
DATA_GENERATOR_SEED = 1337
BATCH_SIZE = 160
VALIDATION_SPLIT = 0.1
tf.random.set_seed(DATA_GENERATOR_SEED)
seed(DATA_GENERATOR_SEED)

# Pick dataset; DF_TYPE={'rnd', 'avg'}
DF_TYPE = 'avg'

In [5]:
# We are only using one class (OC), reals... But we test on reals and fakes
TRAIN_VAL_DIR = f'./Celeb-DF-v2/Celeb-{DF_TYPE}-OC' 

TRAIN_DATAGEN = ImageDataGenerator(rescale = 1.0/255.0, horizontal_flip = True, fill_mode='nearest', validation_split = VALIDATION_SPLIT)
TRAIN_GENERATOR = TRAIN_DATAGEN.flow_from_directory(directory = TRAIN_VAL_DIR,
                                                    batch_size = BATCH_SIZE,
                                                    class_mode = 'input', 
                                                    target_size = (IMG_HEIGHT, IMG_WIDTH),
                                                    subset = 'training',
                                                    seed = DATA_GENERATOR_SEED,
                                                    follow_links = True)

VAL_DATAGEN = ImageDataGenerator(rescale = 1.0/255.0, validation_split = VALIDATION_SPLIT)
VALIDATION_GENERATOR = TRAIN_DATAGEN.flow_from_directory(directory = TRAIN_VAL_DIR,
                                                         batch_size = BATCH_SIZE,
                                                         class_mode = 'input', 
                                                         target_size = (IMG_HEIGHT, IMG_WIDTH),
                                                         subset = 'validation',
                                                         seed = DATA_GENERATOR_SEED)

Found 5065 images belonging to 1 classes.
Found 562 images belonging to 1 classes.


In [6]:
# Define callbacks e.g. Early Stopping
EARLY_STOP = EarlyStopping(monitor='reconstruction_loss',
                           patience=1,
                           mode='min',
                           verbose=1,
                           restore_best_weights=True)

# Define Model OCFakeDect1
vae = OriginalOCFakeDectVAE.OCFakeDect1()

In [7]:
saved_weights = os.listdir(f'./Checkpoints/OGOCFakeDectVAE')
print(saved_weights)
vae.load_weights(f'./Checkpoints/OGOCFakeDectVAE/model_at_epoch_0')
vae.compile(optimizer=Adam())

['model_at_epoch_1.data-00000-of-00001', 'model_at_epoch_1.index', 'checkpoint']


In [None]:
# Trains for full epochs, also very slow (30 mins per epoch on OC)
STEPS = TRAIN_GENERATOR.n//BATCH_SIZE+1
for e in tqdm(range(EPOCHS)):
    for _ in tqdm(range(STEPS)):
        vae.fit(np.concatenate([TRAIN_GENERATOR.next()[0], VALIDATION_GENERATOR.next()[0]], axis=0),
                epochs=STEPS,
                batch_size=2*BATCH_SIZE,
                verbose=2,
                callbacks=[EARLY_STOP])
    # End of epoch, we save weights of model
    vae.save_weights(f'./Checkpoints/OGOCFakeDectVAE/model_at_epoch_{e}', save_format='tf')

  0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/27 [00:00<?, ?it/s][A

Epoch 1/27
1/1 - 15s - loss: 6930.1382 - reconstruction_loss: 6920.6567 - kl_loss: 9.4814
Epoch 2/27
1/1 - 1s - loss: 6918.0293 - reconstruction_loss: 6867.7749 - kl_loss: 50.2546
Epoch 3/27
1/1 - 1s - loss: 6814.9268 - reconstruction_loss: 6814.0615 - kl_loss: 0.8650
Epoch 4/27
1/1 - 1s - loss: 6754.4829 - reconstruction_loss: 6754.4551 - kl_loss: 0.0279
Epoch 5/27
1/1 - 1s - loss: 6692.8940 - reconstruction_loss: 6692.8628 - kl_loss: 0.0311
Epoch 6/27
1/1 - 1s - loss: 6639.5674 - reconstruction_loss: 6639.5342 - kl_loss: 0.0332
Epoch 7/27
1/1 - 1s - loss: 6612.8813 - reconstruction_loss: 6612.7588 - kl_loss: 0.1224
Epoch 8/27
1/1 - 1s - loss: 6602.5063 - reconstruction_loss: 6601.6597 - kl_loss: 0.8469
Epoch 9/27
1/1 - 1s - loss: 6582.3438 - reconstruction_loss: 6576.9951 - kl_loss: 5.3489
Epoch 10/27
1/1 - 1s - loss: 6554.4351 - reconstruction_loss: 6533.8745 - kl_loss: 20.5605
Epoch 11/27
1/1 - 1s - loss: 6523.0635 - reconstruction_loss: 6511.5044 - kl_loss: 11.5589
Epoch 12/27
1/1


  4%|▎         | 1/27 [01:28<38:17, 88.36s/it][A

Epoch 1/27
1/1 - 1s - loss: 6366.2007 - reconstruction_loss: 6322.9395 - kl_loss: 43.2611
Epoch 2/27
1/1 - 1s - loss: 6355.4790 - reconstruction_loss: 6319.6978 - kl_loss: 35.7812
Epoch 3/27
1/1 - 1s - loss: 6343.4644 - reconstruction_loss: 6300.9067 - kl_loss: 42.5577
Epoch 4/27
1/1 - 1s - loss: 6337.4351 - reconstruction_loss: 6269.6162 - kl_loss: 67.8189
Epoch 5/27
1/1 - 1s - loss: 6332.7739 - reconstruction_loss: 6279.5425 - kl_loss: 53.2314
Restoring model weights from the end of the best epoch.



  7%|▋         | 2/27 [01:40<27:18, 65.55s/it][A

Epoch 00005: early stopping
Epoch 1/27
1/1 - 1s - loss: 6327.1548 - reconstruction_loss: 6269.0430 - kl_loss: 58.1116
Epoch 2/27
1/1 - 1s - loss: 6402.8857 - reconstruction_loss: 6253.6924 - kl_loss: 149.1935
Epoch 3/27
1/1 - 1s - loss: 6303.0068 - reconstruction_loss: 6232.9160 - kl_loss: 70.0908
Epoch 4/27
1/1 - 1s - loss: 6350.6924 - reconstruction_loss: 6299.9780 - kl_loss: 50.7145
Restoring model weights from the end of the best epoch.



 11%|█         | 3/27 [01:50<19:32, 48.86s/it][A

Epoch 00004: early stopping
Epoch 1/27
1/1 - 1s - loss: 6405.6938 - reconstruction_loss: 6352.8765 - kl_loss: 52.8175
Epoch 2/27
1/1 - 1s - loss: 6353.5566 - reconstruction_loss: 6282.4097 - kl_loss: 71.1469
Epoch 3/27
1/1 - 1s - loss: 6407.3481 - reconstruction_loss: 6306.3066 - kl_loss: 101.0416
Restoring model weights from the end of the best epoch.



 15%|█▍        | 4/27 [01:57<13:57, 36.41s/it][A

Epoch 00003: early stopping
Epoch 1/27
1/1 - 2s - loss: 6439.5361 - reconstruction_loss: 6338.0908 - kl_loss: 101.4453
Epoch 2/27
1/1 - 1s - loss: 6386.0723 - reconstruction_loss: 6337.0063 - kl_loss: 49.0659
Epoch 3/27
1/1 - 1s - loss: 6401.6162 - reconstruction_loss: 6370.6992 - kl_loss: 30.9168
Restoring model weights from the end of the best epoch.



 19%|█▊        | 5/27 [02:05<10:12, 27.83s/it][A

Epoch 00003: early stopping
Epoch 1/27
1/1 - 1s - loss: 6301.6938 - reconstruction_loss: 6272.8555 - kl_loss: 28.8386
Epoch 2/27
1/1 - 1s - loss: 6306.6855 - reconstruction_loss: 6275.5610 - kl_loss: 31.1245
Restoring model weights from the end of the best epoch.



 22%|██▏       | 6/27 [02:10<07:19, 20.93s/it][A

Epoch 00002: early stopping
Epoch 1/27
1/1 - 1s - loss: 6341.4355 - reconstruction_loss: 6309.0195 - kl_loss: 32.4161
Epoch 2/27
1/1 - 1s - loss: 6318.5898 - reconstruction_loss: 6275.5742 - kl_loss: 43.0156
Epoch 3/27
1/1 - 1s - loss: 6320.7905 - reconstruction_loss: 6258.8477 - kl_loss: 61.9429
Epoch 4/27
1/1 - 1s - loss: 6324.0566 - reconstruction_loss: 6244.9727 - kl_loss: 79.0839
Epoch 5/27
1/1 - 1s - loss: 6305.6230 - reconstruction_loss: 6225.6616 - kl_loss: 79.9616
Epoch 6/27
1/1 - 1s - loss: 6289.1772 - reconstruction_loss: 6217.3716 - kl_loss: 71.8058
Epoch 7/27
1/1 - 1s - loss: 6306.3823 - reconstruction_loss: 6239.4146 - kl_loss: 66.9676
Restoring model weights from the end of the best epoch.



 26%|██▌       | 7/27 [02:47<08:35, 25.76s/it][A

Epoch 00007: early stopping
Epoch 1/27
1/1 - 1s - loss: 6402.9829 - reconstruction_loss: 6335.4800 - kl_loss: 67.5027
Epoch 2/27
1/1 - 1s - loss: 6383.3521 - reconstruction_loss: 6294.4009 - kl_loss: 88.9513
Epoch 3/27
1/1 - 1s - loss: 6410.0562 - reconstruction_loss: 6301.0386 - kl_loss: 109.0177
Restoring model weights from the end of the best epoch.



 30%|██▉       | 8/27 [03:24<09:12, 29.10s/it][A

Epoch 00003: early stopping
Epoch 1/27
1/1 - 1s - loss: 6353.2485 - reconstruction_loss: 6242.6284 - kl_loss: 110.6203
Epoch 2/27
1/1 - 1s - loss: 6306.0576 - reconstruction_loss: 6239.2720 - kl_loss: 66.7857
Epoch 3/27
1/1 - 1s - loss: 6371.2026 - reconstruction_loss: 6313.6382 - kl_loss: 57.5645
Restoring model weights from the end of the best epoch.



 33%|███▎      | 9/27 [03:41<07:38, 25.47s/it][A

Epoch 00003: early stopping
Epoch 1/27
1/1 - 1s - loss: 6452.4751 - reconstruction_loss: 6399.6030 - kl_loss: 52.8722
Epoch 2/27
1/1 - 1s - loss: 6380.0225 - reconstruction_loss: 6327.0693 - kl_loss: 52.9532
Epoch 3/27
1/1 - 1s - loss: 6432.8608 - reconstruction_loss: 6362.2480 - kl_loss: 70.6129
Restoring model weights from the end of the best epoch.



 37%|███▋      | 10/27 [04:18<08:09, 28.81s/it][A

Epoch 00003: early stopping
Epoch 1/27
1/1 - 1s - loss: 6408.4531 - reconstruction_loss: 6345.1089 - kl_loss: 63.3443
Epoch 2/27
1/1 - 1s - loss: 6391.3662 - reconstruction_loss: 6340.2144 - kl_loss: 51.1520
Epoch 3/27
1/1 - 1s - loss: 6377.1831 - reconstruction_loss: 6338.6353 - kl_loss: 38.5477
Epoch 4/27
1/1 - 1s - loss: 6376.6191 - reconstruction_loss: 6343.2876 - kl_loss: 33.3318
Restoring model weights from the end of the best epoch.



 41%|████      | 11/27 [04:47<07:42, 28.88s/it][A

Epoch 00004: early stopping
Epoch 1/27
1/1 - 1s - loss: 6316.1289 - reconstruction_loss: 6282.8999 - kl_loss: 33.2290
Epoch 2/27
1/1 - 1s - loss: 6314.9507 - reconstruction_loss: 6276.5918 - kl_loss: 38.3589
Epoch 3/27
1/1 - 1s - loss: 6303.3179 - reconstruction_loss: 6255.0698 - kl_loss: 48.2478
Epoch 4/27
1/1 - 1s - loss: 6293.8813 - reconstruction_loss: 6231.6973 - kl_loss: 62.1839
Epoch 5/27
1/1 - 1s - loss: 6285.9346 - reconstruction_loss: 6207.0874 - kl_loss: 78.8472
Epoch 6/27
1/1 - 1s - loss: 6284.9146 - reconstruction_loss: 6192.7280 - kl_loss: 92.1865
Epoch 7/27
1/1 - 1s - loss: 6277.7124 - reconstruction_loss: 6185.9097 - kl_loss: 91.8025
Epoch 8/27
1/1 - 1s - loss: 6269.7295 - reconstruction_loss: 6187.7236 - kl_loss: 82.0057
Restoring model weights from the end of the best epoch.



 44%|████▍     | 12/27 [05:22<07:40, 30.70s/it][A

Epoch 00008: early stopping


In [None]:
# Plot 5 images from test set
# for (x_train, _) in TRAIN_GENERATOR.next()[:5]:
#     X = np.expand_dims(x_train, axis=0)
#     Z_mean, Z_log_var, Z = vae.encoder.predict(X)
#     X_prime = vae.decoder.predict(Z)
#     face = np.array(X_prime.reshape(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)*255, dtype=np.uint8)
#     fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(5, 5))
#     axes[0].imshow(X.squeeze())
#     axes[1].imshow(face)
#     fig.tight_layout()

for (x_test, _) in VALIDATION_GENERATOR.next()[:5]:
    X = np.expand_dims(x_test, axis=0)
    Z_mean, Z_log_var, Z = vae.encoder.predict(X)
    X_prime = vae.decoder.predict(Z)
    face = np.array(X_prime.reshape(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)*255, dtype=np.uint8)
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(5, 5))
    axes[0].imshow(X.squeeze())
    axes[1].imshow(face)
    fig.tight_layout()

In [None]:
# Re-define test generators for training the nerual net (since we are considering both classes now)
TRAIN_VAL_DIR = f'./Celeb-DF-v2/Celeb-{DF_TYPE}-30' 
TEST_DIR = f'./Celeb-DF-v2/Celeb-{DF_TYPE}-30-test' 

TRAIN_DATAGEN = ImageDataGenerator(rescale = 1.0/255.0, horizontal_flip = True, fill_mode='nearest', validation_split = VALIDATION_SPLIT)
TRAIN_GENERATOR = TRAIN_DATAGEN.flow_from_directory(directory = TRAIN_VAL_DIR,
                                                    batch_size = BATCH_SIZE,
                                                    class_mode = 'binary', 
                                                    target_size = (IMG_HEIGHT, IMG_WIDTH),
                                                    subset = 'training',
                                                    seed = DATA_GENERATOR_SEED,
                                                    follow_links = True)

VAL_DATAGEN = ImageDataGenerator(rescale = 1.0/255.0, validation_split = VALIDATION_SPLIT)
VALIDATION_GENERATOR = TRAIN_DATAGEN.flow_from_directory(directory = TRAIN_VAL_DIR,
                                                         batch_size = BATCH_SIZE,
                                                         class_mode = 'binary', 
                                                         target_size = (IMG_HEIGHT, IMG_WIDTH),
                                                         subset = 'validation',
                                                         seed = DATA_GENERATOR_SEED)

TEST_DATAGEN = ImageDataGenerator(rescale = 1.0/255.0)
TEST_GENERATOR = TEST_DATAGEN.flow_from_directory(directory = TEST_DIR,
                                                  batch_size = BATCH_SIZE,
                                                  class_mode = 'binary', 
                                                  target_size = (IMG_HEIGHT, IMG_WIDTH),                                
                                                  seed = DATA_GENERATOR_SEED)

In [None]:
# Freeze the layers for the encoder, since now we will only train the dence layers at the end
for layer in vae.encoder.layers:
    layer.trainable = False

# Add simple neural network at end of encoder (AFTER encoder(and decoder) has been trained on OC)
_, _, x = vae.encoder.output
x = layers.Flatten()(x)
x = layers.Dense(1, activation = 'sigmoid')(x)

FC = Model(vae.encoder.input, x, name="FC")

FC.compile(optimizer = SGD(),
           loss = BinaryCrossentropy(),
           metrics = [metrics.BinaryAccuracy(name = 'acc'),
                      metrics.AUC(name = 'auc'),
                      metrics.FalsePositives(name = 'fp')])

In [None]:
FC.fit(TRAIN_GENERATOR, 
       steps_per_epoch = TRAIN_GENERATOR.n//TRAIN_GENERATOR.batch_size
       validation_data = VALIDATION_GENERATOR,
       validation_steps = TEST_GENERATOR.n//TEST_GENERATOR.batch_size,
       epochs=STEPS,
       batch_size=BATCH_SIZE, 
       verbose=1,
       callbacks=[ModelCheckpoint(f'./Checkpoints/FC+OCFakeDectVAE/best_model',
                                  monitor='val_auc', 
                                  mode='max'
                                  verbose=1, 
                                  save_best_only=True)])

In [None]:
# # Load all training data, takes a very long time (3hrs)
# training_data = []
# for i in tqdm(range(TRAIN_GENERATOR.n//BATCH_SIZE+1)):
#     (x_train, _), (x_test, _) = TRAIN_GENERATOR.next(), VALIDATION_GENERATOR.next()
#     training_data.extend(np.concatenate([x_train, x_test], axis=0)) 

# vae.fit(training_data,
#         epochs=100,
#         batch_size=2*BATCH_SIZE,
#         verbose=1)

In [None]:
# # VERY hacky test to see if the model is actually working
# # Purposefully overfit to a single batch of data and train for 1000 epochs on just that one batch
# (x_train, _), (x_test, _) = TRAIN_GENERATOR.next(), VALIDATION_GENERATOR.next()
# training_data = np.concatenate([x_train, x_test], axis=0)
# vae.fit(training_data,
#         epochs=1000,
#         batch_size=2*BATCH_SIZE,
#         verbose=1)

# for x in x_test[:10]:
#     X = np.expand_dims(x, axis=0)
#     Z_mean, Z_log_var, Z = vae.encoder.predict(X)
#     X_prime = vae.decoder.predict(Z)
#     face = np.array(X_prime.reshape(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)*255, dtype=np.uint8)
#     fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(5, 5))
#     axes[0].imshow(X.squeeze())
#     axes[1].imshow(face)
#     fig.tight_layout()
