# XceptionNet Deepfake Detector
FIT3183 2020 S2 Assignment
<br/>By Team Dark.HAIYA
<br/>Team members:
- Kee Pei Jiin
- Chin Wen Yuan

In this Colab, we train a deepfake detector which uses the XceptionNet CNN architecture. We mainly refer to [this Github](https://github.com/otenim/Xception-with-Your-Own-Dataset) to create the detector.


# Download training datasets
The training dataset has 1600 images, which is made up of 800 cropped CelebA images and 800 fake faces downloaded from [here](https://github.com/cc-hpc-itwm/DeepFakeDetection/blob/master/Experiments_CelebA/dataset_celebA.7z).

In [None]:
# Download the training dataset
import gdown
!gdown https://drive.google.com/uc?id=1tZ1pQHuz94TCjzo9mdKWuKlRgOnHfog9

Downloading...
From: https://drive.google.com/uc?id=1tZ1pQHuz94TCjzo9mdKWuKlRgOnHfog9
To: /content/training_images.zip
6.00MB [00:00, 11.6MB/s]


In [None]:
!unzip -q /content/training_images.zip
!rm -r /content/training_images.zip

In [None]:
!mv /content/content/training_images /content/

In [None]:
!rm -r /content/content

# Import Libraries & Variables Declaration

In [1]:
import math
import os
import matplotlib
import imghdr
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt
from keras.applications.xception import Xception, preprocess_input
from keras.optimizers import Adam
#from keras.preprocessing import image
import keras.utils as image
from keras.losses import categorical_crossentropy
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

In [2]:
matplotlib.use('Agg')
#dataset_root = "E://roop//"

dataset_root = 'C:\\DatasetD\\ROOP_100'
result_root = "D:\\DeepFakeRepos\\AllModels\\DeepFakeDetectionModels\\XceptionNet\\"
classes = ["real", "fake"]
num_classes = 2

epochs_pre = 10
epochs_fine = 20
batch_size_pre = 4
batch_size_fine = 4
lr_pre = 1e-3
lr_fine = 1e-4
snapshot_period_pre = 5
snapshot_period_fine = 5
split = 0.7

# Load training data
We load the training images and create their one-hot-categorical label.

Then, we further split the training datasets into smaller datasets for training & validation purposes.
  - 70% will be used for training
  - 30% will be used for validation

In [3]:
# make input_paths and labels
input_paths, labels = [], []
for class_name in os.listdir(dataset_root):
    class_root = os.path.join(dataset_root, class_name)
    class_id = classes.index(class_name)
    for path in os.listdir(class_root):
        path = os.path.join(class_root, path)
        if imghdr.what(path) is None:
            # this is not an image file
            continue
        input_paths.append(path)
        labels.append(class_id)

# convert to one-hot-vector format
labels = to_categorical(labels, num_classes=num_classes)

# convert to numpy array
input_paths = np.array(input_paths)

In [6]:
print(len(labels), len(input_paths))

200 200


In [4]:
# split dataset for training and validation purposes
border = int(len(input_paths) * split)
train_labels = labels[:border]
val_labels = labels[border:]
train_input_paths = input_paths[:border]
val_input_paths = input_paths[border:]
print("Training on %d images and labels" % (len(train_input_paths)))
print("Validation on %d images and labels" % (len(val_input_paths)))

if os.path.exists(result_root) is False:
    os.makedirs(result_root)

Training on 140 images and labels
Validation on 60 images and labels


# Model Training using Transfer Learning Technique

Since out training dataset is quite small, we apply [transfer learning](https://machinelearningmastery.com/transfer-learning-for-deep-learning/#:~:text=Transfer%20learning%20is%20a%20machine,model%20on%20a%20second%20task.) technique in creating the detector.

We use the Keras pre-trained XceptionNet model as our base model. This pre-trained model was trained on Imagenet datasets and is able to classify images into around 1000 different classes. We fine-tune this model so that it recognises real & fake human faces as well.

In [5]:
def generate_from_paths_and_labels(input_paths, labels, batch_size, input_size=(1024, 1024)):
    num_samples = len(input_paths)
    while 1:
        perm = np.random.permutation(num_samples)
        input_paths = input_paths[perm]
        labels = labels[perm]
        for i in range(0, num_samples, batch_size):
            inputs = list(map(
                lambda x: image.load_img(x, target_size=input_size),
                input_paths[i:i+batch_size]
            ))
            inputs = np.array(list(map(
                lambda x: image.img_to_array(x),
                inputs
            )))
            inputs = preprocess_input(inputs)
            yield (inputs, labels[i:i+batch_size])

In [6]:
# base model used is the pre-trained XceptionNet model on imageNet dataset
# do not include imageNet classfier at the top


In [7]:
# create a custom top classifier

base_model = Xception(include_top=False,
                    weights='imagenet',
                    input_shape=(1024, 1024, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.inputs, outputs=predictions)

In [8]:
# train the top classifier layer

# freeze the base_model body layers
for layer in base_model.layers:
    layer.trainable = False

# compile model
model.compile(loss=categorical_crossentropy,
              optimizer=Adam(lr=lr_pre),
              metrics=['accuracy']
)

# train
hist_pre = model.fit_generator(
    generator=generate_from_paths_and_labels(input_paths=train_input_paths,
                                              labels=train_labels,
                                              batch_size=batch_size_pre),

    steps_per_epoch=math.ceil(len(train_input_paths) / batch_size_pre),

    epochs=epochs_pre,

    validation_data=generate_from_paths_and_labels(input_paths=val_input_paths,
                                                  labels=val_labels,
                                                  batch_size=batch_size_pre),

    validation_steps=math.ceil(len(val_input_paths) / batch_size_pre),

    verbose=1,

    callbacks=[ModelCheckpoint(
                filepath=os.path.join(result_root,
                                'model_pre_ep_ROOP_100_5_{epoch}_valloss{val_loss:.3f}.h5'),
                period=snapshot_period_pre,),
    ],
)

model.save(os.path.join(result_root, 'model_pre_ROOP_100_5_final.h5'))



  super().__init__(name, **kwargs)
  hist_pre = model.fit_generator(


Epoch 1/10


: 

In [11]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [12]:
# Fine-tune model
# set all the layers to be trainable
for layer in model.layers:
    layer.trainable = True

# recompile
model.compile(optimizer=Adam(lr=lr_fine),
              loss=categorical_crossentropy,
              metrics=['accuracy'])

# train
hist_fine = model.fit_generator(
    generator=generate_from_paths_and_labels(input_paths=train_input_paths,
                                            labels=train_labels,
                                            batch_size=batch_size_fine),

  steps_per_epoch=math.ceil(len(train_input_paths) / batch_size_fine),

  epochs=epochs_fine,

  validation_data=generate_from_paths_and_labels(input_paths=val_input_paths,
                                                labels=val_labels,
                                                batch_size=batch_size_fine),

  validation_steps=math.ceil(len(val_input_paths) / batch_size_fine),

  verbose=1,

  callbacks=[ModelCheckpoint(
          filepath=os.path.join(result_root,
                                'model_fine_ep_EeE_OLDER_{epoch}_valloss{val_loss:.3f}.h5'),
          period=snapshot_period_fine,),
  ],
)

model.save(os.path.join(result_root, 'model__EeE_OLDER_Final_1.h5'))



  super().__init__(name, **kwargs)
  hist_fine = model.fit_generator(


Epoch 1/25


: 

In [None]:
# performance of the final fine-tuned model
acc = hist_fine.history["accuracy"][-1]
val_acc = hist_fine.history["val_accuracy"][-1]
loss = hist_fine.history['loss'][-1]
val_loss = hist_fine.history['val_loss'][-1]

print("Accuracy on training data: %.2f" %acc)
print("Loss on training data: %.2f" %loss)
print("Accuracy on validation data: %.2f" %val_acc)
print("Loss on validation data: %.2f" %val_loss)

Accuracy on training data: 0.98
Loss on training data: 0.06
Accuracy on validation data: 0.95
Loss on validation data: 0.16


In [None]:
# download the final model weight files
from google.colab import files
files.download("/content/results/model_fine_final.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>