In [None]:
import os
import sys

import tensorflow as tf
tf.config.experimental_run_functions_eagerly(True)

import h5py
import numpy as np

from sklearn.preprocessing import MinMaxScaler

tf.test.gpu_device_name()

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

In [None]:
# add src to path
sys.path.insert(0,'/content/gdrive/MyDrive/fake-faces-detector/src')
from utils.modelling_functions import *
from utils.color_space_operations import comatrix_from_image, calculate_difference_image, hist_peek_point

# Loading datasets

In [None]:
path = '/content/gdrive/MyDrive/masterDB/train.h5'
path_val = '/content/gdrive/MyDrive/masterDB/val.h5'
available_models = ['Xception', 'Dense', 'ScalarNN', "SVM"]
available_inputs = ['RGB', 'HCbCr', 'HSV', 'YCbCr', 'grad', 'scalars']

In [None]:
model_name = 'SVM'
input = "scalars"

number_of_train_imgs = 6500
number_of_val_imgs = 1200

In [None]:
X_train = load_dataset_h5(path, 'X_train')[:number_of_train_imgs]
y_train = load_dataset_h5(path, 'y_train')[:number_of_train_imgs].reshape((-1,1))

In [None]:
X_val = load_dataset_h5(path_val, 'X_val')[:number_of_val_imgs]
y_val = load_dataset_h5(path_val, 'y_val')[:number_of_val_imgs].reshape((-1,1))

In [None]:
if input == "HCbCr":
    X_train = getHCbCr(X_train)
    X_val = getHCbCr(X_val)
    
elif input == "RGB":
    pass

elif input == "HSV":
    X_train = getHSV(X_train)
    X_val = getHSV(X_val)

elif input == "YCbCr":
    X_train = getYCbCr(X_train)
    X_val = getYCbCr(X_val)

elif input == "grad":
    X_train = getGradImg(X_train)
    X_val = getGradImg(X_val)

elif input == "scalars":
    X_train = getAdditionalScalars(X_train)
    X_val = getAdditionalScalars(X_val)

elif input not in available_inputs:
    raise ValueError("Bad input specified")
else: 
    raise NotImplementedError("Specified input not implemented")

# Create model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import xception, densenet
from sklearn import svm
from sklearn.metrics import confusion_matrix, classification_report


In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

In [None]:
def normalize_scalar_input(X_train, X_val):
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    return (X_train, X_val)

In [None]:
if model_name == 'Dense':
    X_train= densenet.preprocess_input(X_train)
    X_val = densenet.preprocess_input(X_val)
    model = get_densenet()

elif model_name == 'Xception':
    X_train= xception.preprocess_input(X_train)
    X_val = xception.preprocess_input(X_val)
    model = get_xception()

elif model_name == "ScalarNN":
    X_train, X_val = normalize_scalar_input(X_train, X_val)
    model = get_scalaraNN_model()

elif model_name == "SVM":
    y_train = np.ravel(y_train)
    model = svm.SVC(kernel='rbf')

elif model_name not in available_models:
    raise ValueError("Bad model specified")
else: 
    raise NotImplementedError("Specified model not implemented")

In [None]:
if model_name== "SVM":
    model.fit(X_train, y_train)
else:
    results = model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks = [early_stop], epochs=200)

## Evaluation

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import joblib

In [None]:
saving_dir = f"/content/gdrive/MyDrive/fake-faces-detector/exp/models/{model_name}_{input}"
if os.path.exists(saving_dir) == False:
    os.makedirs(saving_dir)

In [None]:
# version to print
y_pred = model.predict(X_val)
report = classification_report(y_val, y_pred)
print(report)
# save
report = classification_report(y_val, y_pred, output_dict=True)
df = pd.DataFrame(report).transpose()
df.to_csv(os.path.join(saving_dir, 'classification_report.csv'))


In [None]:
categories = ["fake", "real"]
# get confusion matrix
y_pred = model.predict(X_val)
conf_matrix = confusion_matrix(y_val, y_pred)
pd_conf_matrix = pd.DataFrame(conf_matrix, columns=categories, index=categories)
sn.set(font_scale=1.4) # for label size
ax = plt.axes()
sn.heatmap(pd_conf_matrix, ax = ax, annot=True, cmap='binary', fmt='g', cbar = False)
ax.set_title('Confusion Matrix')

plt.savefig(os.path.join(saving_dir, 'conf_matrix.png'))

In [None]:
# save information about Keras models
if model_name != "SVM":
    # learning history
    losses = pd.DataFrame(model.history.history)
    losses.to_csv(os.path.join(saving_dir, 'losses.csv'))
    # loss plot
    losses[['loss', 'val_loss']].plot()
    plt.savefig(os.path.join(saving_dir, 'loss.png'))
    # acc plot
    losses[['accuracy', 'val_accuracy']].plot()
    plt.savefig(os.path.join(saving_dir, 'acc.png'))
    # model save
    model.save(os.path.join(saving_dir, 'model.h5'))

else:
    joblib.dump(model, os.path.join(saving_dir, "model.sav"))
