# Train

Following from [Preprocessing](https://github.com/TheNerdyCat/deepfake-detection-challenge/blob/master/output/preprocessing.ipynb), this stage will look at data augmentation and subsequently training the model.

First we will undersample the images to balance REAL and FAKE images in both the train and validation sets. There are actually more FAKE images than REAL in this dataset, so this will be addressed accordingly.

We will read our extracted faces using OpenCV and perform any data augmentation. Following this, we will define X and X_val. Then we'll read the metadata to label the extracted faces as FAKE or REAL, defining them into y and y_val.

After we have our training data and validation data ready and shuffled, we'll train our model.

In [2]:
import pandas as pd
import numpy as np

import os
import json # To read the metadata

import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import backend as k

from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.callbacks import Callback, EarlyStopping

#import torch
#import keras
#from keras import Model, Sequential
#from keras.layers import *
#from keras.optimizers import *
#from keras.callbacks import LearningRateScheduler

import cv2

from sklearn.model_selection import KFold
from sklearn.metrics import log_loss

from tqdm.notebook import tqdm
import random
import gc

import warnings
warnings.filterwarnings("ignore")

#tf.debugging.set_log_device_placement(True) # Enable GPU logging
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [3]:
train_images_path = '../input/train_images/'
train_images = os.listdir(train_images_path)
metadata_path = '../input/train_metadata/'
metadata_dir = os.listdir(metadata_path)

# Read in all the metadata files to make one inclusive dict
metadata = {}
for i, file in enumerate(metadata_dir):
    with open('../input/train_metadata/' + file) as json_file:
        metadata = {**metadata, **json.load(json_file)}

X_paths = []
for img in train_images:
    img = train_images_path + img
    X_paths.append(img)

y = []
for label in train_images:
    if metadata[label.split('_')[0] + '.mp4']['label'] == 'REAL':
        y.append(0)
    else:
        y.append(1)

In [4]:
def shuffle(X, y):
    new_train = []
    for m, n in zip(X, y):
        new_train.append([m, n])
    random.shuffle(new_train)
    X, y = [], []
    for x in new_train:
        X.append(x[0])
        y.append(x[1])
    return X, y

In [5]:
X_paths, y = shuffle(X_paths, y)

# Create X_test from 10% of X
X_test_paths = X_paths[:round(len(X_paths) / 100 * 25)]
X_paths = X_paths[round(len(X_paths) / 100 * 25):]

# Create y_test from 10% of y
y_test = y[:round(len(y) / 100 * 25)]
y = y[round(len(y) / 100 * 25):]

In [6]:
X_paths, y = shuffle(X_paths, y)
X_test_paths, y_test = shuffle(X_test_paths, y_test)

In [7]:
print('There are ' + str(y.count(1)) + ' fake train samples')
print('There are ' + str(y.count(0)) + ' real train samples')
print('There are ' + str(y_test.count(1)) + ' fake test samples')
print('There are ' + str(y_test.count(0)) + ' real test samples')

There are 65969 fake train samples
There are 13154 real train samples
There are 21955 fake test samples
There are 4419 real test samples


## Undersampling
Next we'll balance our data, using undersampling techniques. Source for this method can be found [here](https://www.kaggle.com/unkownhihi/starter-kernel-with-cnn-model-ll-lb-0-69235#Apply-Underbalancing-Techinique)

In [8]:
real = []
fake = []
for m, n in zip(X_paths, y):
    if n == 0:
        real.append(m)
    else:
        fake.append(m)
fake = random.sample(fake, len(real))
X_paths, y = [], []
for x in real:
    X_paths.append(x)
    y.append(0)
for x in fake:
    X_paths.append(x)
    y.append(1)

In [9]:
real = []
fake = []
for m, n in zip(X_test_paths, y_test):
    if n == 0:
        real.append(m)
    else:
        fake.append(m)
fake = random.sample(fake, len(real))
X_test_paths, y_test = [], []
for x in real:
    X_test_paths.append(x)
    y_test.append(0)
for x in fake:
    X_test_paths.append(x)
    y_test.append(1)

In [10]:
X_paths, y = shuffle(X_paths, y)
X_test_paths, y_test = shuffle(X_test_paths, y_test)

In [11]:
print('There are ' + str(y.count(1)) + ' fake train samples')
print('There are ' + str(y.count(0)) + ' real train samples')
print('There are ' + str(y_test.count(1)) + ' fake test samples')
print('There are ' + str(y_test.count(0)) + ' real test samples')

There are 13154 fake train samples
There are 13154 real train samples
There are 4419 fake test samples
There are 4419 real test samples


## Data Augmentation

Data augmentation will go here

In [12]:
ROWS = 64
COLS = 64
CHANNELS = 3
CLASSES = 2

In [13]:
def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR)
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)

def prepare_data(images):
    m = len(images)
    X = np.zeros((m, ROWS, COLS, CHANNELS), dtype=np.uint8)
    y = np.zeros((1, m), dtype=np.uint8)
    for i, image_file in enumerate(images):
        X[i,:] = read_image(image_file)
         
        if metadata[image_file.split('/')[3].split('_')[0]+'.mp4']['label'] == 'REAL':
            y[0, i] = 1
        elif metadata[image_file.split('/')[3].split('_')[0]+'.mp4']['label'] == 'FAKE':
            y[0, i] = 0
    return X, y

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y

In [14]:
train_set_x, train_set_y = prepare_data(X_paths)
test_set_x, test_set_y = prepare_data(X_test_paths)

X_train = train_set_x / 255
X_test = test_set_x / 255

Y_train = convert_to_one_hot(train_set_y, CLASSES).T
Y_test = convert_to_one_hot(test_set_y, CLASSES).T

In [15]:
print ("Number of training examples =", X_train.shape[0])
print ("Number of test examples =", X_test.shape[0])
print ("X_train shape:", X_train.shape)
print ("Y_train shape:", Y_train.shape)
print ("X_test shape:", X_test.shape)
print ("Y_test shape:", Y_test.shape)

Number of training examples = 26308
Number of test examples = 8838
X_train shape: (26308, 64, 64, 3)
Y_train shape: (26308, 2)
X_test shape: (8838, 64, 64, 3)
Y_test shape: (8838, 2)


## Modelling

We implement our ResNet using Keras.

In [16]:
def identity_block(X, f, filters, stage, block):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    F1, F2, F3 = filters

    # Save the input value. We'll need this later to add back to the main path. 
    X_shortcut = X

    # First component of main path
    X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(1,1), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1,1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1,1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)

    return X

In [17]:
def convolutional_block(X, f, filters, stage, block, s=2):
    # defining name basis
    conv_name_base='res' + str(stage) + block + '_branch'
    bn_name_base='bn' + str(stage) + block + '_branch'
    
    # Retrieve Filters
    F1, F2, F3 = filters
    
    # Save the input value
    X_shortcut = X


    ##### MAIN PATH #####
    # First component of main path 
    X = Conv2D(F1, (1, 1), strides=(s,s), name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    
    ##### SHORTCUT PATH ####
    X_shortcut = Conv2D(F3, (1, 1), strides=(s,s), name = conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    
    return X

In [18]:
def ResNet50(input_shape = (64, 64, 3), classes=2):   
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    
    # Stage 1
    X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    # Stage 2
    X = convolutional_block(X, f=3, filters=[64, 64, 256], stage=2, block='a', s=1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')

    # Stage 3
    X = convolutional_block(X, f=3, filters=[128, 128, 512], stage=3, block='a', s=2)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='d')

    # Stage 4
    X = convolutional_block(X, f=3, filters=[256, 256, 1024], stage=4, block='a', s=2)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f')

    # Stage 5
    X = convolutional_block(X, f=3, filters=[512, 512, 2048], stage=5, block='a', s=2)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')

    # AVGPOOL.
    X = AveragePooling2D((2, 2), name='avg_pool')(X)

    # output layer
    X = Flatten()(X)
    X = Dense(classes, activation='sigmoid', name='fc' + str(classes), kernel_initializer=glorot_uniform(seed=0))(X)
    
    # Create model
    model = Model(inputs=X_input, outputs=X, name='ResNet50')

    return model

In [19]:
kfolds = 5
kf = KFold(n_splits=kfolds)
losses = []

for fold, (tdx, vdx) in enumerate(kf.split(X_train, Y_train)):
    print(f'Fold : {fold}')
    X, X_val, Y, Y_val = X_train[tdx], X_train[vdx], Y_train[tdx], Y_train[vdx]
    model = ResNet50(input_shape=(64, 64, 3), classes=2)
    model.compile(optimizer='adam', loss='binary_crossentropy')
    es = EarlyStopping(monitor='loss', 
                   mode='min',
                   restore_best_weights=True, 
                   verbose=2, 
                   patience=10)
    model.fit(X_train, Y_train, callbacks=[es], epochs=10, batch_size=64, verbose=1)
    pred = model.predict([X_val])
    loss = log_loss(Y_val, pred)
    model.save_weights(f'resnet50_{fold}.h5')
    print('')
    print('Fold ' + str(fold) + ' log loss: ' + str(loss))
    print('')
    losses.append(loss)
    gc.collect()

Fold : 0
Train on 26308 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Fold 0 log loss: 0.6992001248405364

Fold : 1
Train on 26308 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Fold 1 log loss: 1.2799395006537875

Fold : 2
Train on 26308 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Fold 2 log loss: 0.9370857150666398

Fold : 3
Train on 26308 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Fold 3 log loss: 0.8476774827342617

Fold : 4
Train on 26308 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Fold 4 log loss: 0.956792681726417



In [20]:
print(np.mean(losses))

0.9441391010043285


In [21]:
preds = model.evaluate(X_test, Y_test, verbose=0)
print ("Loss = " + str(preds))

Loss = 0.9759235875480826
