## **Import Necessary Libraries**

In [1]:
import numpy as np
from liblinear.liblinearutil import save_model, load_model, predict
import matplotlib.pyplot as plt
import h5py

## **Import Functions from train.py**

In [2]:
from train import get_data, spatial_frequency_feature_fusion, prepare_data, train_linear_model, visualize

## **Run this for preprocessing**

- must include /utils
- example path: "/Users/Danniel/Detection-of-GAN-Generated-Images-using-Spatial-Frequency-Domain-Fusion-Data/utils"

In [None]:
import sys
# path of the folder "Detection-of-GAN-Generated-Images-using-Spatial-Frequency-Domain-Fusion-Data"
sys.path.append("/Users/User/Desktop/Detection-of-GAN-Generated-Images-using-Spatial-Frequency-Domain-Fusion-Data/utils")

from preprocessing_save import load_image, save_image

# directory for reading
img_gan = "/Users/Danniel/Downloads/gan scenes"
img_real = "/Users/Danniel/Downloads/real scenes"

# directory for saving (must be an empty folder)
save_dir_gan = "/Users/Danniel/Downloads/p_gan_scenes"
save_dir_real = "/Users/Danniel/Downloads/p_real_scenes"

# store the images
image_real = load_image(img_real)
image_gan = load_image(img_gan)

save_image(image_real, save_dir_real)
save_image(image_gan, save_dir_gan)

## **Provide directory for Preprocessed Real and GAN-Generated Images**

In [None]:
gan_directory = "/Users/Danniel/Downloads/Sample Dataset/gan"
real_directory = "/Users/Danniel/Downloads/Sample Dataset/real"

## *"get_data" function will load the preprocessed images from the directory and store it in a list*

In [None]:
# run data preparation
gan_data = get_data(gan_directory)
real_data = get_data(real_directory)

## *Run to perform the proposed "spatial frequency feature fusion" method to extract meaningful features of an image to classify whether it is GAN-Generated or Real. The feature vector of each image will be stored in "fused_features" list*

In [None]:
# run feature extraction & feature fusion
extracted_gan = spatial_frequency_feature_fusion(gan_data)
extracted_real = spatial_frequency_feature_fusion(real_data)

## **For Data Visualization**
- provide real and gan parameter

In [None]:
# visualize
visualize(extracted_gan, extracted_real)

## **Data Preparation before feeding to the classifier**
- assigning of labels
- flattening of feature vector

In [None]:
# prepare the data
labels, datasets = prepare_data(extracted_gan, extracted_real)

## **Model Training**
- Liblinear


***
    - change the parameter if the data are from loaded text file
    - train_model(loaded_labels, loaded_features)

In [None]:
# train the data
model = train_linear_model(labels, datasets, C=1)

## **Save the model using ".model" extension**

In [None]:
# save the model
model_file = "/Users/Danniel/Downloads/Model/True Model/scenes.model"
save_model(model_file, model)

## **Load the model***

In [8]:
model_file = load_model("/Users/User/Desktop/model/combined_liblinear.model")
# plat_file = load_model("platt scaler/platt_scale_scenes.model")

## **Model Testing**
- provide directory (folder) for testing datasets

## *Testing Combined GAN and Real Images*
- use "predict" if model is trained in liblinear
- use "svm_predict" if model is trained in libsvm

In [None]:
from preprocessing import preprocessing
import os
import numpy as np
from train import spatial_frequency_feature_fusion

# test the model
def get_test_data(directory):
    images = []

    # load the images and store in images list
    for filename in os.listdir(directory):
        image = os.path.join(directory, filename)
        if image is not None:
            images.append(image)

    # preprocessing
    preprocessed_img = []
    for i in images:
        preprocessed_img.append(preprocessing(i))   

    # flatten the feature vector
    fused_features = spatial_frequency_feature_fusion(preprocessed_img)
    labels = np.ones((len(fused_features), 1)) 
    true_label = labels.reshape(labels.shape[0])

    feature_vector = []
    for i in fused_features:
        print(i)
        feature_vector.append(i.flatten())


    return feature_vector


def prepare_test_data(gan, real):
    print("\n\n-------------------PREPARING TEST DATA----------------------------\n")
    #label real  and gan datasets
    gan_label = np.ones((len(gan), 1))
    real_label = np.zeros((len(real), 1))

    # combine the labels and datasets
    dataset_labels = np.vstack((gan_label, real_label))
    datasets = np.vstack((gan, real))

    feature_vector = [i.flatten() for i in datasets]
    true_label = dataset_labels.reshape(dataset_labels.shape[0])

    return feature_vector, true_label


def predict_labels(feature_vector, true_label, model):  
    # predict the result
    print("\n\n-------------------THE MODEL IS PREDICTING----------------------------\n")
    predicted_labels, _, svm_scores= predict(true_label, feature_vector, model)


    print("------------------------------------------RESULT-----------------------------------\n")
    result = []
    for i in predicted_labels:
        if i == 1.0:
            result.append("GAN")
        elif i == 0.0:
            result.append("Real")

    
    print(predicted_labels)
    print(svm_scores)

    return svm_scores



while True:
    option = input("Does your data came from text file? (y/n)")

    if option.lower() == "y":
        # mode prediction
        scores = predict_labels(np.labels, np.features, model_file)
        break

    elif option.lower() == "n":           
        #provide directory for testing dataset
        dir_gan = "/Users/Danniel/Downloads/gan scenes test"
        dir_real = "/Users/Danniel/Downloads/real scenes test"

        # undergo spatial-frequency-feature fusion
        gan = get_test_data(dir_gan)
        real = get_test_data(dir_real)

        # combine the real and gan data
        features, labels = prepare_test_data(gan, real)

        # model prediction
        scores = predict_labels(features, labels, model_file)
        break
    
    else:
        print("Invalid Input")

## **Re-testing**

In [None]:
svm_labels, _, scores = predict(labels, features, model_file)
predicted_labels, _, probability_estimates = predict(labels, scores, plat_file, '-b 1')

print(predicted_labels)
print(probability_estimates)

## **Store the model data in hdf5 file**

In [None]:
import os

# provide directory for data folder
os.chdir("/Users/Danniel/Downloads/Sample Dataset")

# rename the model data
hf = h5py.File('data.h5', 'w')

# store labels and datasets to the model data
hf.create_dataset('labels', data=labels)
hf.create_dataset('feature_vector',  data=datasets)

# close the h5py
hf.close()

## **Load the data from hdf5 file**

In [5]:
# directory of data folder

import os
os.chdir("/Users/User/Desktop/model")

# provide filename of the model data
rf = h5py.File('data.h5', 'r')

rf.keys()

<KeysViewHDF5 ['feature_vector', 'labels']>

In [6]:
loaded_labels = rf.get('labels')
loaded_feature_vector = rf.get('feature_vector')

# convert object to numpy array
np_labels = np.array(loaded_labels)
np_features = np.array(loaded_feature_vector)

print("Loaded Labels: ",len(np_labels))
print("Loaded Features: ", len(np_features))

Loaded Labels:  5600
Loaded Features:  5600


In [10]:
from preprocessing import preprocessing
import os
import numpy as np
from train import spatial_frequency_feature_fusion

# test the model
def get_test_data(directory):
    images = []

    # load the images and store in images list
    for filename in os.listdir(directory):
        image = os.path.join(directory, filename)
        if image is not None:
            images.append(image)

    # preprocessing
    preprocessed_img = []
    for i in images:
        preprocessed_img.append(preprocessing(i))   

    # flatten the feature vector
    fused_features = spatial_frequency_feature_fusion(preprocessed_img)
    labels = np.ones((len(fused_features), 1)) 
    true_label = labels.reshape(labels.shape[0])

    feature_vector = []
    for i in fused_features:
        print(i)
        feature_vector.append(i.flatten())


    return feature_vector


def prepare_test_data(gan, real):
    print("\n\n-------------------PREPARING TEST DATA----------------------------\n")
    #label real  and gan datasets
    gan_label = np.ones((len(gan), 1))
    real_label = np.zeros((len(real), 1))

    # combine the labels and datasets
    dataset_labels = np.vstack((gan_label, real_label))
    datasets = np.vstack((gan, real))

    feature_vector = [i.flatten() for i in datasets]
    true_label = dataset_labels.reshape(dataset_labels.shape[0])

    return feature_vector, true_label


def predict_labels(feature_vector, true_label, model):  
    # predict the result
    print("\n\n-------------------THE MODEL IS PREDICTING----------------------------\n")
    predicted_labels, _, svm_scores= predict(true_label, feature_vector, model)


    print("------------------------------------------RESULT-----------------------------------\n")
    result = []
    for i in predicted_labels:
        if i == 1.0:
            result.append("GAN")
        elif i == 0.0:
            result.append("Real")

    
    print(predicted_labels)
    print(svm_scores)

    return svm_scores



while True:
    option = input("Does your data came from text file? (y/n)")

    if option.lower() == "y":
        # mode prediction
        scores = predict_labels(np_features, np_labels, model_file)
        break

    elif option.lower() == "n":           
        #provide directory for testing dataset
        dir_gan = "/Users/Danniel/Downloads/gan scenes test"
        dir_real = "/Users/Danniel/Downloads/real scenes test"

        # undergo spatial-frequency-feature fusion
        gan = get_test_data(dir_gan)
        real = get_test_data(dir_real)

        # combine the real and gan data
        features, labels = prepare_test_data(gan, real)

        # model prediction
        scores = predict_labels(features, labels, model_file)
        break
    
    else:
        print("Invalid Input")



-------------------THE MODEL IS PREDICTING----------------------------

Accuracy = 100% (5600/5600) (classification)
------------------------------------------RESULT-----------------------------------

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 

## **Platt Scaling**
- run this to provide the platt scaling of the model
- variable *scores* will be obtained when testing is run (this code should be run after getting all the svm_scores of the training data)
- change the parameter of train accordingly

In [12]:
from liblinear.liblinearutil import train

validate = train(np_labels, scores, '-s 0 -c 1 -B 1 -v 5')
platt_scale = train(np_labels, scores, '-s 0 -c 1 -B 1')

save_model('/Users/User/Desktop/model/platt_scale_updated_combined.model', platt_scale)

Cross Validation Accuracy = 100%
