In [1]:
import os.path
import tensorflow as tf
import numpy as np
from scipy import linalg
from Antispoofing.AntispoofHelpers.dataset_helper import get_test_generator, get_antispoof_frame
import pandas as pd
from Antispoofing.AntispoofHelpers.hyper_perameter_helper import combine_with_augmentation
from constants import PROJECT_ROOT



Constant~ Document path: /home/jarred/Documents


In [2]:
from Antispoofing.AntispoofHelpers.hyper_perameter_helper import initialise_tf

initialise_tf()

2022-11-03 18:14:09.635693: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-03 18:14:09.780585: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/jarred/anaconda3/envs/Orfao_Masters/lib/python3.8/site-packages/cv2/../../lib64:
2022-11-03 18:14:09.780607: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [3]:
inception_model = tf.keras.applications.InceptionV3(include_top=False,
                              weights="imagenet",
                              pooling='avg')

2022-11-03 18:14:09.895988: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:

def compute_embeddings(data_loader_1, data_loader_2):
    def _compute_embeddings(dataloader):
        step_size = dataloader.n // dataloader.batch_size
        embeddings = inception_model.predict(dataloader, step_size, verbose=1)
        return embeddings

    # compute embeddings for real images
    data_loader_1_embeddings = _compute_embeddings(data_loader_1)

    # compute embeddings for generated images
    data_loader_2_embeddings = _compute_embeddings(data_loader_2)

    print(data_loader_1_embeddings.shape," ", data_loader_2_embeddings.shape)
    return data_loader_1_embeddings, data_loader_2_embeddings

In [5]:

def calculate_fid(real_embeddings, generated_embeddings):
    # calculate mean and covariance statistics
    mu1, sigma1 = real_embeddings.mean(axis=0), np.cov(real_embeddings, rowvar=False)
    mu2, sigma2 = generated_embeddings.mean(axis=0), np.cov(generated_embeddings,  rowvar=False)
    # calculate sum squared difference between means
    ssdiff = np.sum((mu1 - mu2)**2.0)
    # calculate sqrt of product between cov
    covmean = linalg.sqrtm(sigma1.dot(sigma2))
    # check and correct imaginary numbers from sqrt
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    # calculate score
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)

    return fid



In [6]:
def calculate_set_distance(set_1_df_file_path_frame, set_2_df_file_path_frame):

    if set_1_df_file_path_frame.shape[0]> set_2_df_file_path_frame.shape[0]:
        set_1_df_file_path_frame = set_1_df_file_path_frame[:set_2_df_file_path_frame.shape[0]]
    elif set_1_df_file_path_frame.shape[0]< set_2_df_file_path_frame.shape[0]:
        set_2_df_file_path_frame = set_2_df_file_path_frame[:set_1_df_file_path_frame.shape[0]]

    set_1_generator = get_test_generator(set_1_df_file_path_frame)
    set_2_generator = get_test_generator(set_2_df_file_path_frame)

    set_1_embeddings, set_2_embeddings = compute_embeddings(set_1_generator, set_2_generator)

    return calculate_fid(set_1_embeddings, set_2_embeddings)

In [7]:
test_subject_number = 90

dataset_root = "/home/jarred/Documents/Datasets/SIW"

dataset_csv_name = "siw.csv"

In [8]:
from Antispoofing.AntispoofHelpers.dataset_helper import get_dataframe_by_attack_category, get_dataframe_by_medium_name
def get_category_frame(frame, categories):
    attack_frames = []
    for attack_type in categories:
        if attack_type == "P" or attack_type == "R":
            attack_frames.append(get_dataframe_by_attack_category(frame, attack_type))
        else:
            attack_frames.append(get_dataframe_by_medium_name(frame, attack_type))

    return pd.concat(attack_frames)


# Calculate Inter-Set variability

In [9]:
mode = 0 # siw
# mode = 1 # siw kf
# mode = 2 # traditional
modes = [0, 1, 2]

In [10]:
for mode in modes:
    if mode == 0:
        aug_root ="/home/jarred/Documents/Generated/SIW_90"
        aug_csv ="SIW_90.csv"
        csv_name = "SIW_Inter_Variability.csv"
    elif mode == 1:
        aug_root ="/home/jarred/Documents/Generated/SIW_KF_90"
        aug_csv ="SIW_KF_90.csv"
        csv_name = "SIW_KF_Inter_Variability.csv"
    elif mode == 2:
        aug_root ="/home/jarred/Documents/TraditionalAugmentation/SIW_90"
        aug_csv ="SIW_90.csv"
        csv_name = "Traditional_Inter_Variability.csv"
    else:
        raise TypeError("Mode must be either 0, 1, or 2")

    train_spoof_df = pd.read_csv(os.path.join(dataset_root, dataset_csv_name)).query(f"subject_number == {90} and ground_truth == 'spoof'")


    train_real_df = pd.read_csv(os.path.join(dataset_root, dataset_csv_name)).query(f"subject_number == {90} and ground_truth == 'real'")

    test_real_df = pd.read_csv(os.path.join(dataset_root, dataset_csv_name)).query(f"subject_number == {75} and ground_truth == 'real'")

    test_spoof_df = pd.read_csv(os.path.join(dataset_root, dataset_csv_name)).query(f"subject_number == {75} and ground_truth == 'spoof'")

    TRAIN_SPOOF_COMBINATIONS = [
                [  "IP7P", "IPP2017", "SGS8"],
                [ "ASUS",  "IPP2017", "SGS8"],
                [ "ASUS", "IP7P", "SGS8"],
                [ "ASUS", "IP7P", "IPP2017"],
                [ "ASUS", "IP7P", "IPP2017", "SGS8"],
                [ "P"],
    ]
    TEST_SPOOF_COMBINATIONS = [
            [ "ASUS"],
            [ "IP7P"],
            [ "IPP2017"],
            [ "SGS8"],
            [ "P"],
            [ "ASUS", "IP7P", "IPP2017", "SGS8"],
    ]
    inter_df_dic = {}
    for i in range(len(TRAIN_SPOOF_COMBINATIONS)):
        # get the combinations
        train_spoof_combination = TRAIN_SPOOF_COMBINATIONS[i]
        test_spoof_combination = TEST_SPOOF_COMBINATIONS[i]
        # get the train and test data frames
        current_train_spoof_df = get_category_frame(train_spoof_df,train_spoof_combination)
        current_test_spoof_df = get_category_frame(test_spoof_df,  test_spoof_combination)

        # combine train spoof and real
        current_train_spoof_df = pd.concat([current_train_spoof_df, train_real_df])
        current_test_spoof_df = pd.concat([current_test_spoof_df, test_real_df])

        # get the file path frames
        current_train_spoof_file_path_frame = get_antispoof_frame(current_train_spoof_df, dataset_root)
        current_test_spoof_file_path_frame = get_antispoof_frame(current_test_spoof_df, dataset_root)

        # shuffle the test
        current_test_spoof_file_path_frame = current_test_spoof_file_path_frame.sample(frac=1).reset_index(drop=True)

        use_last_only = False
        must_remove_normal = True
        must_use_normal_only=False

        aug_frame = pd.read_csv(os.path.join(aug_root, aug_csv))
        AUG_PERCENTAGES = [0.05,0.1,0.2, 0.30]

        fid_inter_values ={}
        for aug_percentage in AUG_PERCENTAGES:
            current_aug_frame = combine_with_augmentation(train_frame= current_train_spoof_file_path_frame, aug_frame= aug_frame,aug_root= aug_root,categories= train_spoof_combination, aug_percentage=aug_percentage,must_remove_normal= must_remove_normal,must_use_normal_only=must_use_normal_only, stratified_name_list_func=None)

            current_train_aug_spoof_file_path_frame = pd.concat([current_train_spoof_file_path_frame, current_aug_frame])
            current_train_aug_spoof_file_path_frame = current_train_aug_spoof_file_path_frame.sample(frac=1).reset_index(drop=True)
            fid = calculate_set_distance(current_train_aug_spoof_file_path_frame, current_test_spoof_file_path_frame)
            fid_inter_values[aug_percentage] = round(fid, 2)
            print("Aug %: ", aug_percentage, " FID: ", round(fid, 2))

        protocol_name = ""
        if len(TEST_SPOOF_COMBINATIONS[i]) > 1:
            protocol_name = "R"
        else:
            protocol_name = TEST_SPOOF_COMBINATIONS[i][0]

        inter_df_dic[protocol_name] = fid_inter_values

    print(inter_df_dic)
    df = pd.DataFrame.from_dict(inter_df_dic, orient='index')
    if not os.path.exists(os.path.join(PROJECT_ROOT, "Results", "Inter_Variability")):
        os.makedirs(os.path.join(PROJECT_ROOT, "Results", "Inter_Variability"))
    df.to_csv(os.path.join(PROJECT_ROOT, "Results", "Inter_Variability", csv_name))

Found 10738 validated image filenames belonging to 2 classes.
Found 10738 validated image filenames belonging to 2 classes.


KeyboardInterrupt



# Calculate Intra-Variability

In [11]:
mode = 0 # siw
# mode = 1 # siw kf
# mode = 2 # traditional
modes = [0, 1, 2]

In [13]:
for mode in modes:
    if mode == 0:
        aug_root ="/home/jarred/Documents/Generated/SIW_90"
        aug_csv ="SIW_90.csv"
        csv_name = "SIW_Intra_Variability.csv"
    elif mode == 1:
        aug_root ="/home/jarred/Documents/Generated/SIW_KF_90"
        aug_csv ="SIW_KF_90.csv"
        csv_name = "SIW_KF_Intra_Variability.csv"
    elif mode == 2:
        aug_root ="/home/jarred/Documents/TraditionalAugmentation/SIW_90"
        aug_csv ="SIW_90.csv"
        csv_name = "Traditional_Intra_Variability.csv"
    else:
        raise TypeError("Mode must be either 0, 1, or 2")

    # get the spoof and real frames
    train_spoof_df = pd.read_csv(os.path.join(dataset_root, dataset_csv_name)).query(f"subject_number == {90} and ground_truth == 'spoof'")
    train_real_df = pd.read_csv(os.path.join(dataset_root, dataset_csv_name)).query(f"subject_number == {90} and ground_truth == 'real'")
    # get the real file paths
    current_train_real_file_path_frame = get_antispoof_frame(train_real_df, dataset_root)

    # shuffle the real
    current_train_real_file_path_frame = current_train_real_file_path_frame.sample(frac=1).reset_index(drop=True)

    TRAIN_SPOOF_COMBINATIONS = [
             [  "IP7P", "IPP2017", "SGS8"],
            [ "ASUS",  "IPP2017", "SGS8"],
            [ "ASUS", "IP7P", "SGS8"],
        [ "ASUS", "IP7P", "IPP2017"],

        [ "ASUS", "IP7P", "IPP2017", "SGS8"],
        [ "P"],
    ]
    TEST_SPOOF_COMBINATIONS = [
        [ "ASUS"],
        [ "IP7P"],
        [ "IPP2017"],
        [ "SGS8"],
        [ "P"],
        [ "ASUS", "IP7P", "IPP2017", "SGS8"],
    ]
    aug_name = "Trad.csv"

    use_last_only = False
    must_remove_normal = True
    must_use_normal_only=False

    aug_frame = pd.read_csv(os.path.join(aug_root, aug_csv))
    AUG_PERCENTAGES = [0.05,0.1,0.2, 0.30]
    intra_df_dic = {}
    for i in range(len(TRAIN_SPOOF_COMBINATIONS)):
        train_spoof_combination = TRAIN_SPOOF_COMBINATIONS[i]

        # Get the spoof frames
        current_train_spoof_df = get_category_frame(train_spoof_df,train_spoof_combination)
        current_train_spoof_file_path_frame = get_antispoof_frame(current_train_spoof_df, dataset_root)

        fid_intra_values ={}
        for aug_percentage in AUG_PERCENTAGES:
            current_aug_frame = combine_with_augmentation(train_frame= current_train_spoof_file_path_frame, aug_frame= aug_frame,aug_root= aug_root,categories= train_spoof_combination, aug_percentage=aug_percentage,must_remove_normal= must_remove_normal,must_use_normal_only=must_use_normal_only, stratified_name_list_func=None)

            current_train_aug_spoof_file_path_frame = pd.concat([current_train_spoof_file_path_frame, current_aug_frame])
            current_train_aug_spoof_file_path_frame = current_train_aug_spoof_file_path_frame.sample(frac=1).reset_index(drop=True)
            fid = calculate_set_distance(current_train_aug_spoof_file_path_frame, current_train_real_file_path_frame)
            fid_intra_values[aug_percentage] = round(fid, 2)
            print("Aug %: ", aug_percentage, " FID: ", round(fid, 2))

        protocol_name = ""
        if len(TEST_SPOOF_COMBINATIONS[i]) > 1:
            protocol_name = "R"
        else:
            protocol_name = TEST_SPOOF_COMBINATIONS[i][0]

        intra_df_dic[protocol_name] = fid_intra_values
    print(inter_df_dic)
    df = pd.DataFrame.from_dict(intra_df_dic, orient='index')
    if not os.path.exists(os.path.join(PROJECT_ROOT, "Results", "Intra_Variability")):
        os.makedirs(os.path.join(PROJECT_ROOT, "Results", "Intra_Variability"))
    df.to_csv(os.path.join(PROJECT_ROOT, "Results", "Intra_Variability", csv_name))

Found 4736 validated image filenames belonging to 1 classes.
Found 4736 validated image filenames belonging to 1 classes.
(4736, 2048)   (4736, 2048)
Aug %:  0.05  FID:  42823.48
Found 5000 validated image filenames belonging to 1 classes.
Found 5000 validated image filenames belonging to 1 classes.
(5000, 2048)   (5000, 2048)
Aug %:  0.1  FID:  43034.56
Found 5624 validated image filenames belonging to 1 classes.
Found 5624 validated image filenames belonging to 1 classes.
(5624, 2048)   (5624, 2048)
Aug %:  0.2  FID:  43602.88
Found 6428 validated image filenames belonging to 1 classes.
Found 6428 validated image filenames belonging to 1 classes.
(6428, 2048)   (6428, 2048)
Aug %:  0.3  FID:  44465.67
Found 4728 validated image filenames belonging to 1 classes.
Found 4728 validated image filenames belonging to 1 classes.
(4728, 2048)   (4728, 2048)
Aug %:  0.05  FID:  33908.93
Found 4989 validated image filenames belonging to 1 classes.
Found 4989 validated image filenames belonging 