**Load Required Packages**

In [26]:
import cv2
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import scipy
import keras
from keras import layers
from sklearn import metrics
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from keras.layers import Dense, Conv2D, MaxPool2D, Input, GlobalAveragePooling2D,MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from keras.utils.data_utils import get_file
from keras import backend as K

**Load Images**

In [28]:
def limit_data(data_dir, n=100):
    data = []
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        for idx, filename in enumerate(os.listdir(class_dir)):
            if idx >= n:  # Stop when reaching the limit
                break
            file_path = os.path.join(class_dir, filename)
            data.append((file_path, class_name))
    return pd.DataFrame(data, columns=['filename', 'class'])

base_path = 'Dataset2/real_vs_fake/real-vs-fake'
image_gen = ImageDataGenerator(rescale=1./255.)
batch_size = 32
target_size = (224, 224)

print("Organized")

train_df = limit_data(os.path.join(base_path, 'train'), 50000)
valid_df = limit_data(os.path.join(base_path, 'valid'), 10000)
test_df = limit_data(os.path.join(base_path, 'test'), 10000)

train_flow = image_gen.flow_from_dataframe(
    dataframe=train_df,
    x_col="filename",
    y_col="class",
    target_size=target_size,
    batch_size=batch_size,
    class_mode="categorical"
)

valid_flow = image_gen.flow_from_dataframe(
    dataframe=valid_df,
    x_col="filename",
    y_col="class",
    target_size=target_size,
    batch_size=batch_size,
    class_mode="categorical"
)

test_flow = image_gen.flow_from_dataframe(
    dataframe=test_df,
    x_col="filename",
    y_col="class",
    target_size=target_size,
    batch_size=1,
    shuffle=False,
    class_mode="categorical"
)

Organized
Found 100000 validated image filenames belonging to 2 classes.
Found 20000 validated image filenames belonging to 2 classes.
Found 19999 validated image filenames belonging to 2 classes.




In [29]:
train_flow.class_indices

{'fake': 0, 'real': 1}

In [30]:
valid_flow.class_indices

{'fake': 0, 'real': 1}

In [31]:
test_flow.class_indices

{'fake': 0, 'real': 1}

**Helper Functions**

In [33]:
"""
Plot the training and validation loss and accuracy
epochs - list of epoch numbers
loss - training loss for each epoch
val_loss - validation loss for each epoch
"""
def plot_loss(epochs, loss, val_loss):
    plt.plot(epochs, loss, 'bo', label='Training Loss')
    plt.plot(epochs, val_loss, 'orange', label = 'Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()
def plot_accuracy(epochs, acc, val_acc):
    plt.plot(epochs, acc, 'bo', label='Training accuracy')
    plt.plot(epochs, val_acc, 'orange', label = 'Validation accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.show()

**Fine Tune VGGFace Model**

In [35]:
VGG16_WEIGHTS_PATH_NO_TOP = 'https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_notop_vgg16.h5'
VGGFACE_DIR = 'models/vggface'

In [36]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model

def VGG16(input_shape=None):
    img_input = Input(shape=input_shape)

    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(img_input)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x)
    x = BatchNormalization()(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5')(x)

    inputs = img_input
    model = Model(inputs, x, name='vggface_vgg16')
    model.load_weights('vgg16_weights.h5', by_name=True)

    return model

In [37]:
vgg_model = VGG16(input_shape=(224,224,3))
# for layers in vgg_model.layers:
#     layers.trainable=False
last_layer = vgg_model.get_layer('pool5').output
flat_layer = GlobalAveragePooling2D()(last_layer)
fc1 = Dense(1024, activation='relu', name='fc1')(flat_layer)
fc2 = Dense(1024, activation='relu', name='fc2')(fc1)
fc3 = Dense(512, activation='relu', name='fc3')(fc2)
dense2 = Dense(2, activation='sigmoid', name='dense2')(fc3)
model = Model(vgg_model.input, dense2)

In [38]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1_1 (Conv2D)            (None, 224, 224, 64)      1792      
                                                                 
 batch_normalization_13 (Bat  (None, 224, 224, 64)     256       
 chNormalization)                                                
                                                                 
 conv1_2 (Conv2D)            (None, 224, 224, 64)      36928     
                                                                 
 batch_normalization_14 (Bat  (None, 224, 224, 64)     256       
 chNormalization)                                                
                                                                 
 pool1 (MaxPooling2D)        (None, 112, 112, 64)      0     

In [39]:
model.compile(
    loss=keras.losses.categorical_crossentropy,
    metrics=['acc'],
    optimizer=tf.keras.optimizers.Adam(0.0001)
)

In [None]:
train_steps = len(train_df)//batch_size
valid_steps = len(valid_df)//batch_size
filepath_model = "model_vggface.h5"
checkpoint = ModelCheckpoint(filepath_model, monitor='val_acc', verbose=1, 
                             save_best_only=True, mode='max')

reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=2, 
                                   verbose=1, mode='max', min_lr=0.00001)
callbacks_list = [checkpoint, reduce_lr]

history = model.fit(train_flow,
                    steps_per_epoch=train_steps, 
                    validation_data=valid_flow,
                    validation_steps=valid_steps,
                    epochs=1,
                    verbose=1,
                   callbacks=callbacks_list)

In [54]:
model.save('vggface.h5')

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plot_loss(range(1, len(loss) + 1), loss, val_loss)

In [None]:
plot_accuracy(range(1, len(loss) + 1), acc, val_acc)

In [61]:
y_pred = model.predict(test_flow)
y_test = test_flow.classes



In [64]:
x=np.argmax(y_pred,axis=1)
print("ROC-AUC Score:", metrics.roc_auc_score(y_test, x))
print("AP Score:", metrics.average_precision_score(y_test, x))
print()
print(metrics.classification_report(y_test, x > 0.5))

ROC-AUC Score: 0.9507499699969998
AP Score: 0.9105263730357516

              precision    recall  f1-score   support

           0       1.00      0.90      0.95     10000
           1       0.91      1.00      0.95      9999

    accuracy                           0.95     19999
   macro avg       0.96      0.95      0.95     19999
weighted avg       0.96      0.95      0.95     19999

