## **Import Necessary Libraries**

In [24]:
import numpy as np
from libsvm.svmutil import svm_save_model, svm_load_model, svm_predict
from liblinear.liblinearutil import save_model, load_model, predict
import time
import os
import matplotlib.pyplot as plt

## **Import Functions from train.py**

In [25]:
from train import get_data, spatial_frequency_feature_fusion, prepare_data, train_model, train_linear_model, visualize

## **Run this for preprocessing**

- must include /utils
- example path: "/Users/Danniel/Detection-of-GAN-Generated-Images-using-Spatial-Frequency-Domain-Fusion-Data/utils"

In [26]:
import sys
# path of the folder "Detection-of-GAN-Generated-Images-using-Spatial-Frequency-Domain-Fusion-Data"
sys.path.append("/Users/Danniel/Detection-of-GAN-Generated-Images-using-Spatial-Frequency-Domain-Fusion-Data/utils")

from preprocessing_save import load_image, save_image

# directory for reading
img_real = "/Users/Danniel/Downloads/Datasets/Low Dataset/test_real"
img_gan = "/Users/Danniel/Downloads/Datasets/Low Dataset/test_gan"

# directory for saving (must be an empty folder)
save_dir_real = "/Users/Danniel/Downloads/Datasets/tp_real"
save_dir_gan = "/Users/Danniel/Downloads/Datasets/tp_gan"

# store the images
image_real = load_image(img_real)
image_gan = load_image(img_gan)

save_image(image_real, save_dir_real)
save_image(image_gan, save_dir_gan)

## **Provide directory for Preprocessed Real and GAN-Generated Images**

In [19]:
real_directory = "/Users/Danniel/Downloads/Datasets/tp_real"
gan_directory = "/Users/Danniel/Downloads/Datasets/tp_gan"

## *"get_data" function will load the preprocessed images from the directory and store it in a list*

In [20]:
# run data preparation
real_data = get_data(real_directory)
gan_data = get_data(gan_directory)

Preprocessed Images:  10
Preprocessed Images:  10


## *Run to perform the proposed "spatial frequency feature fusion" method to extract meaningful features of an image to classify whether it is GAN-Generated or Real. The feature vector of each image will be stored in "fused_features" list*

In [21]:
# run feature extraction & feature fusion
extracted_real = spatial_frequency_feature_fusion(real_data)
extracted_gan = spatial_frequency_feature_fusion(gan_data)

Performing Feature Extraction
Applying Local Binary Pattern



[[  0   0   0 ...   0   0   0]
 [  0 195 255 ... 191  30   0]
 [  0 143 159 ...  31  30   0]
 ...
 [  0 142  14 ... 252 120   0]
 [  0 135 143 ... 252 124   0]
 [  0   0   0 ...   0   0   0]]

1 out of 10 images
Percentage: 10.0




[[  0   0   0 ...   0   0   0]
 [  0 255  14 ... 243 255   0]
 [  0 255 255 ...  57 224   0]
 ...
 [  0 135 135 ...  62 121   0]
 [  0   7 135 ...  60 120   0]
 [  0   0   0 ...   0   0   0]]

2 out of 10 images
Percentage: 20.0




[[  0   0   0 ...   0   0   0]
 [  0  31 175 ... 255 255   0]
 [  0 255   7 ... 248 248   0]
 ...
 [  0 239 207 ...  31  15   0]
 [  0 135 255 ...  31  31   0]
 [  0   0   0 ...   0   0   0]]

3 out of 10 images
Percentage: 30.0




[[  0   0   0 ...   0   0   0]
 [  0 124  60 ... 248 252   0]
 [  0  60  60 ... 248 248   0]
 ...
 [  0 225 225 ... 225 225   0]
 [  0 225 225 ... 225 225   0]
 [  0   0   0 ...   0   0   0]]

4 out of 10 images
Percentage: 40.0




[[  0

## **For Data Visualization**
- provide real and gan parameter

In [None]:
# visualize
visualize(extracted_real, extracted_gan)

## **Data Preparation before feeding to the classifier**
- assigning of labels
- flattening of feature vector

In [22]:
# prepare the data
labels, datasets = prepare_data(extracted_real, extracted_gan)
print(labels, datasets)

----------------------------Preparing the Data-------------------------------

Labels:  20
Datasets:  20
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([ 0,  0,  0, ..., 95, 32,  0], dtype=uint8), array([0, 0, 0, ..., 1, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, ..., 0, 0, 0], dtype=uint8), array([0, 0, 0, 

## **Run this cell if you want to store the data in txt**
- it can be useful for storing training data to avoid repetition of process

**Suggested format**
- train_labels.txt
- train_features.txt
   

In [23]:
# Save the data
np.savetxt("/Users/Danniel/Downloads/Datasets/Text File Data/train_labels_faces_10.txt", labels)
np.savetxt("/Users/Danniel/Downloads/Datasets/Text File Data/train_features_faces_10.txt", datasets)

**Load the data**

In [None]:
# Load the data
loaded_labels = np.loadtxt("/Users/Danniel/Downloads/Datasets/Text File Data/train_labels_faces_orig.txt")
loaded_features = np.loadtxt("/Users/Danniel/Downloads/Datasets/Text File Data/train_features_faces_orig.txt")

print("Labels: ", len(loaded_labels))
print("Features: ", len(loaded_features))

## **Model Training**
- LibSVM
- Support Vector Machine
- Linear Kernel


***
    - change the parameter if the data are from loaded text file
    - train_model(loaded_labels, loaded_features)

In [None]:
# train the data
model = train_model(labels, datasets, C=1)

## **Model Training**
- Liblinear


***
    - change the parameter if the data are from loaded text file
    - train_model(loaded_labels, loaded_features)

In [None]:
# train the data
model = train_linear_model(loaded_labels, loaded_features, C=1)

## **Run this when you train the model in liblinear**

In [None]:
import pickle
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV

clf = LinearSVC(loss='hinge', C=1)

model = clf.fit(loaded_features, loaded_labels)

platt = CalibratedClassifierCV(clf, method='sigmoid')
prob_estimates = platt.fit(loaded_features, loaded_labels)

# change the filename with directory
with open('/Users/Danniel/Downloads/Model/platt_scale.pkl', 'wb') as model_file:
    pickle.dump(prob_estimates, model_file)

## **Save the model using ".model" extension**

In [None]:
# save the model
model_file = "/Users/Danniel/Downloads/Model/Validate/animals_liblinear.model"
save_model(model_file, model)

## **Load the model***
- use "load_model" if model is trained in liblinear
- use "svm_load_model" if model is trained in libsvm

In [None]:
model_file = load_model("/Users/Danniel/Downloads/Model/Validate/faces_old_liblinear.model")

## **Model Testing**
- provide directory (folder) for testing datasets

## *Testing Combined GAN and Real Images*
- use "predict" if model is trained in liblinear
- use "svm_predict" if model is trained in libsvm

In [None]:
from preprocessing import preprocessing
import os
import numpy as np
from train import spatial_frequency_feature_fusion

# test the model
def get_test_data(directory):
    images = []

    # load the images and store in images list
    for filename in os.listdir(directory):
        image = os.path.join(directory, filename)
        if image is not None:
            images.append(image)

    # preprocessing
    preprocessed_img = []
    for i in images:
        preprocessed_img.append(preprocessing(i))   

    # flatten the feature vector
    fused_features = spatial_frequency_feature_fusion(preprocessed_img)
    labels = np.ones((len(fused_features), 1)) 
    true_label = labels.reshape(labels.shape[0])

    feature_vector = []
    for i in fused_features:
        print(i)
        feature_vector.append(i.flatten())


    return feature_vector


def prepare_test_data(real, gan):
    print("\n\n-------------------PREPARING TEST DATA----------------------------\n")
    #label real  and gan datasets
    real_label = np.ones((len(real), 1))
    gan_label = np.zeros((len(gan), 1))

    # combine the labels and datasets
    dataset_labels = np.vstack((real_label, gan_label))
    datasets = np.vstack((real, gan))

    feature_vector = [i.flatten() for i in datasets]
    true_label = dataset_labels.reshape(dataset_labels.shape[0])

    return feature_vector, true_label


def predict_labels(feature_vector, true_label, model):  
    # predict the result
    print("\n\n-------------------THE MODEL IS PREDICTING----------------------------\n")
    predicted_labels, _, svm_scores= predict(true_label, feature_vector, model)


    print("------------------------------------------RESULT-----------------------------------\n")
    result = []
    for i in predicted_labels:
        if i == 1.0:
            result.append("Real")
        elif i == 0.0:
            result.append("GAN")

    
    print(predicted_labels)
    print(result)

    return svm_scores



while True:
    option = input("Does your data came from text file? (y/n)")

    if option.lower() == "y":
        # mode prediction
        scores = predict_labels(loaded_features, loaded_labels, model_file)
        break

    elif option.lower() == "n":           
        #provide directory for testing dataset
        dir_real = "/Users/User/Desktop/real test/real test objects"
        dir_gan = "/Users/User/Desktop/gan test/gan test objects"

        # undergo spatial-frequency-feature fusion
        real = get_test_data(dir_real)
        gan = get_test_data(dir_gan)

        # combine the real and gan data
        features, labels = prepare_test_data(real, gan)

        # model prediction
        scores = predict_labels(features, labels, model_file)
        break
    
    else:
        print("Invalid Input")

## *Testing One Class of Image only*
- use "predict" if model is trained in liblinear
- use "svm_predict" if model is trained in libsvm

In [None]:
from preprocessing import preprocessing
import os
import numpy as np
from train import spatial_frequency_feature_fusion

# test the model
def get_test_data(directory):
    images = []

    # load the images and store in images list
    for filename in os.listdir(directory):
        image = os.path.join(directory, filename)
        if image is not None:
            images.append(image)

    # preprocessing
    preprocessed_img = []
    for i in images:
        preprocessed_img.append(preprocessing(i))   

    # flatten the feature vector
    fused_features = spatial_frequency_feature_fusion(preprocessed_img)
    labels = np.ones((len(fused_features), 1)) 
    true_label = labels.reshape(labels.shape[0])

    feature_vector = []
    for i in fused_features:
        print(i)
        feature_vector.append(i.flatten())


    return feature_vector


def prepare_test_data(gan):
    print("\n\n-------------------PREPARING TEST DATA----------------------------\n")
    #label real  and gan datasets
    #real_label = np.ones((len(real), 1))
    gan_label = np.zeros((len(gan), 1))

    # combine the labels and datasets
    #dataset_labels = np.vstack((real_label, gan_label))
    #datasets = np.vstack((real, gan))

    feature_vector = [i.flatten() for i in gan]
    true_label = gan_label.reshape(gan_label.shape[0])

    return feature_vector, true_label


def predict(feature_vector, true_label, model):  
    # predict the result
    print("\n\n-------------------THE MODEL IS PREDICTING----------------------------\n")
    predicted_labels, _, likelihood = svm_predict(true_label, feature_vector, model, '-b 1')


    print("------------------------------------------RESULT-----------------------------------\n")
    result = []
    for i in predicted_labels:
        if i == 1.0:
            result.append("Real")
        elif i == 0.0:
            result.append("GAN")

    
    print(predicted_labels)
    print(result)



while True:
    option = input("Does your data came from text file? (y/n)")

    if option.lower() == "y":
        # mode prediction
        predict(loaded_features, loaded_labels, model_file)
        break

    elif option.lower() == "n":           
        #provide directory for testing dataset
        #dir_real = "/Users/User/Desktop/real test/real test objects"
        dir_gan = "/Users/User/Desktop/gan test/gan test objects"

        # undergo spatial-frequency-feature fusion
        #real = get_test_data(dir_real)
        gan = get_test_data(dir_gan)

        # combine the real and gan data
        features, labels = prepare_test_data(gan)

        # model prediction
        predict(features, labels, model_file)
        break
    
    else:
        print("Invalid Input")

## **Store the test data in txt file**

In [None]:
# Save the data
np.savetxt("/Users/User/Desktop/txtfiles/test_labels.txt", labels)
np.savetxt("/Users/User/Desktop/txtfiles/test_features.txt", features)

## **Platt Scaling**
- run this to provide the platt scaling of the model
- variable *scores* will be obtained when testing is run (this code should be run after getting all the svm_scores of the training data)
- change the parameter of train accordingly

In [None]:
from liblinear.liblinearutil import train

platt_scale = train(loaded_labels, scores, '-s 0 -c 1 -B 1')

save_model('/Users/Danniel/Downloads/Model/Platt Scaling/platt_scale_validate(backup)_faces.model', platt_scale)