In [None]:
import tensorflow as tf 


from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten


from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D,Conv2D,MaxPooling2D,Input,Lambda,GlobalMaxPooling2D
from keras.regularizers import l2
from keras import backend as K
from keras.applications.vgg16 import VGG16

from matplotlib.pyplot import imread,imshow
from skimage.transform import rescale, resize
from skimage.io import imsave

import os
import numpy as np
from keras.models import load_model
from tensorflow.compat.v1.keras.backend import get_session


In [None]:
# Dataset parameters 
dataset_path = '../../datasets/VisualPhish/'
reshape_size = [224,224,3]
num_targets = 155 

# Model parameters
input_shape = [224,224,3]
margin = 2.2
epsilon = 0.01 #the noise magnitude of adv examples

output_dir = './'
saved_model = 'model'

# Load dataset:
    - Load training screenshots per website
    - Load Phishing screenshots per website 

In [None]:

def read_imgs_per_website(data_path,targets,imgs_num,reshape_size,start_target_count):
    all_imgs = np.zeros(shape=[imgs_num,224,224,3])
    all_labels = np.zeros(shape=[imgs_num,1])
    
    all_file_names = []
    targets_list = targets.splitlines()
    count = 0
    for i in range(0,len(targets_list)):
        target_path = data_path + targets_list[i]
        print(target_path)
        file_names = sorted(os.listdir(target_path))
        for j in range(0,len(file_names)):
            try:
                img = imread(target_path+'/'+file_names[j])
                img = img[:,:,0:3]
                all_imgs[count,:,:,:] = resize(img, (reshape_size[0], reshape_size[1]),anti_aliasing=True)
                all_labels[count,:] = i + start_target_count
                all_file_names.append(file_names[j])
                count = count + 1
            except:
                #some images were saved with a wrong extensions 
                try:
                    img = imread(target_path+'/'+file_names[j],format='jpeg')
                    img = img[:,:,0:3]
                    all_imgs[count,:,:,:] = resize(img, (reshape_size[0], reshape_size[1]),anti_aliasing=True)
                    all_labels[count,:] = i + start_target_count
                    all_file_names.append(file_names[j])
                    count = count + 1
                except:
                    print('failed at:')
                    print('***')
                    print(file_names[j])
                    break 
    return all_imgs,all_labels,all_file_names



In [None]:
# Read images legit (train)
data_path = dataset_path + 'trusted_list/'
targets_file = open(data_path+'targets.txt', "r")
targets = targets_file.read()
imgs_num = 9363
all_imgs_train,all_labels_train,all_file_names_train = read_imgs_per_website(data_path,targets,imgs_num,reshape_size,0)

# Read images phishing
data_path = dataset_path + 'phishing/'
targets_file = open(data_path+'targets.txt', "r")
targets = targets_file.read()
imgs_num = 1195
all_imgs_test,all_labels_test,all_file_names_test = read_imgs_per_website(data_path,targets,imgs_num,reshape_size,0)

X_train_legit = all_imgs_train
y_train_legit = all_labels_train

# Load the train and test split
phish_test_idx = np.load(output_dir+'test_idx.npy')
phish_train_idx = np.load(output_dir+'train_idx.npy')

X_test_phish = all_imgs_test[phish_test_idx,:]
y_test_phish = all_labels_test[phish_test_idx,:]

X_train_phish = all_imgs_test[phish_train_idx,:]
y_train_phish = all_labels_test[phish_train_idx,:]


# Load Model 

In [None]:
#load model

from keras.models import load_model
margin = 2.2
def loss(y_true,y_pred):
    loss_value = K.maximum(y_true, margin + y_pred)
    loss_value = K.mean(loss_value,axis=0)
    return loss_value

full_model = load_model(output_dir+saved_model+'.h5', custom_objects={'loss': loss})

#define custom_loss
def custom_loss(margin):
    def loss(y_true,y_pred):
        loss_value = K.maximum(y_true, margin + y_pred)
        loss_value = K.mean(loss_value,axis=0)
        return loss_value
    return loss
my_loss = custom_loss(30) #Enter a high margin in order to make sure not to have a 0-loss values 

#get tf session
sess = K.get_session()
#sess = get_session()
#to be able to use tf.placeholder
#tf.disable_v2_behavior() 


# Triplet Sampling

In [None]:
# Order the split array 
def order_random_array(orig_arr,y_orig_arr,targets):
    sorted_arr = np.zeros(orig_arr.shape)
    y_sorted_arr = np.zeros(y_orig_arr.shape)
    count = 0
    for i in range(0,targets):
        for j in range(0,orig_arr.shape[0]):
            if y_orig_arr[j] == i:
                sorted_arr[count,:,:,:] = orig_arr[j,:,:,:]
                y_sorted_arr[count,:] = i
                count = count + 1
    return sorted_arr,y_sorted_arr 

X_test_phish,y_test_phish = order_random_array(X_test_phish,y_test_phish,num_targets)
X_train_phish,y_train_phish = order_random_array(X_train_phish,y_train_phish,num_targets)


# Get start and end of each label of the phishing set 
def start_end_each_target_not_complete(num_target,labels):
    prev_target = labels[0]
    start_end_each_target = np.zeros((num_target,2))
    start_end_each_target[0,0] = labels[0]
    if not labels[0] == 0:
        start_end_each_target[0,0] = -1
        start_end_each_target[0,1] = -1
    count_target = 0
    for i in range(1,labels.shape[0]):
        if not labels[i] == prev_target:
            start_end_each_target[int(labels[i-1]),1] = int(i-1)
            #count_target = count_target + 1
            start_end_each_target[int(labels[i]),0] = int(i)
            prev_target = labels[i]
    start_end_each_target[int(labels[-1]),1] = int(labels.shape[0]-1)
    
    for i in range(1,num_target):
        if start_end_each_target[i,0] == 0:
            start_end_each_target[i,0] = -1
            start_end_each_target[i,1] = -1
    return start_end_each_target

labels_start_end_train_phish = start_end_each_target_not_complete(num_targets,y_train_phish)


# Get start and end of each label
def start_end_each_target(num_target,labels):
    prev_target = 0
    start_end_each_target = np.zeros((num_target,2))
    start_end_each_target[0,0] = 0
    count_target = 0
    for i in range(1,labels.shape[0]):
        if not labels[i] == prev_target:
            start_end_each_target[count_target,1] = i-1
            count_target = count_target + 1
            start_end_each_target[count_target,0] = i
            prev_target = prev_target + 1
    start_end_each_target[num_target-1,1] = labels.shape[0]-1
    return start_end_each_target

labels_start_end_train_legit = start_end_each_target(num_targets,y_train_legit)

def pick_pos_img_idx(prob_phish,img_label):
    if np.random.uniform() > prob_phish:
        #take image from legit
        class_idx_start_end = labels_start_end_train_legit[img_label,:]
        same_idx = np.random.randint(low = class_idx_start_end[0],high = class_idx_start_end[1]+1)
        img = X_train_legit[same_idx,:]
    else:
        #take from phish
        if not labels_start_end_train_phish[img_label,0] == -1:
            class_idx_start_end = labels_start_end_train_phish[img_label,:]
            same_idx = np.random.randint(low = class_idx_start_end[0],high = class_idx_start_end[1]+1)
            img = X_train_phish[same_idx,:]
        else:
            class_idx_start_end = labels_start_end_train_legit[img_label,:]
            same_idx = np.random.randint(low = class_idx_start_end[0],high = class_idx_start_end[1]+1)
            img = X_train_legit[same_idx,:]
    return img


def pick_neg_img(anchor_idx,num_targets):
    if anchor_idx == 0:
        targets = np.arange(1,num_targets)
    elif anchor_idx == num_targets -1:
        targets = np.arange(0,num_targets-1)
    else:
        targets = np.concatenate([np.arange(0,anchor_idx),np.arange(anchor_idx+1,num_targets)])
    diff_target_idx = np.random.randint(low = 0,high = num_targets-1)
    diff_target = targets[diff_target_idx]
    
    class_idx_start_end = labels_start_end_train_legit[diff_target,:]
    idx_from_diff_target = np.random.randint(low = class_idx_start_end[0],high = class_idx_start_end[1]+1)
    img = X_train_legit[idx_from_diff_target,:]
    
    return img,diff_target

targets_file = open(data_path+'targets.txt', "r")
all_targets = targets_file.read()
all_targets = all_targets.splitlines()

def get_idx_of_target(target_name,all_targets):
    for i in range(0,len(all_targets)):
        if all_targets[i] == target_name:
            found_idx = i
            return found_idx

target_lists = [['microsoft','ms_outlook','ms_office','ms_bing','ms_onedrive','ms_skype'],['apple','itunes','icloud'],['google','google_drive'],['alibaba','aliexpress']]

def get_associated_targets_idx(target_lists,all_targets):
    sub_target_lists_idx = []
    parents_ids = []
    for i in range(0,len(target_lists)):
        target_list = target_lists[i]
        parent_target = target_list[0]
        one_target_list = []
        parent_idx = get_idx_of_target(parent_target,all_targets)
        parents_ids.append(parent_idx)
        for child_target in target_list[1:]:
            child_idx = get_idx_of_target(child_target,all_targets)
            one_target_list.append(child_idx)
        sub_target_lists_idx.append(one_target_list)
    return parents_ids,sub_target_lists_idx 

parents_ids,sub_target_lists_idx  = get_associated_targets_idx(target_lists,all_targets)

def check_if_same_category(img_label1,img_label2):
    if_same = 0
    if img_label1 in parents_ids:
        if img_label2 in sub_target_lists_idx[parents_ids.index(img_label1)]:
            if_same = 1
    elif img_label1 in sub_target_lists_idx[0]:
        if img_label2 in sub_target_lists_idx[0] or img_label2 == parents_ids[0]:
            if_same = 1
    elif img_label1 in sub_target_lists_idx[1]:
        if img_label2 in sub_target_lists_idx[1] or img_label2 == parents_ids[1]:
            if_same = 1
    elif img_label1 in sub_target_lists_idx[2]:
        if img_label2 in sub_target_lists_idx[2] or img_label2 == parents_ids[2]:
            if_same = 1
    return if_same

# Generate Adv. examples 

In [None]:
def get_adv_example(triple,epsilon):
    
    # Initialize adversarial example 
    anchor_adv = np.zeros_like(triple[0])
    # Added noise
    anchor_noise = np.zeros_like(triple[0])

    y_true = tf.placeholder("float", [None,1])
    target = np.zeros([1,1])
    target.astype(float)
    
    # Get the loss and gradient of the loss wrt the inputs
    loss_val = my_loss(y_true, full_model.output)
    grads = K.gradients(loss_val, full_model.input[0])
    
    # Get the sign of the gradient
    delta = K.sign(grads[0])
    
    dict_input = {y_true:target,full_model.input[0]:triple[0],full_model.input[1]:triple[1],full_model.input[2]:triple[2] }
    delta1 = sess.run(delta, feed_dict=dict_input)
    
    # Get noise
    anchor_noise = anchor_noise + delta1
    
    # Perturb the image
    anchor_adv = triple[0] + epsilon*delta1
    
    return anchor_noise,anchor_adv

In [None]:
# initialize 3 empty arrays for the input image batch
batch_size = 1
h = X_train_legit.shape[1]
w = X_train_legit.shape[2]
triple=[np.zeros((batch_size, h, w,3)) for i in range(3)]

X_test_phish_non_ordered = all_imgs_test[phish_test_idx,:]
y_test_phish_non_ordered = all_labels_test[phish_test_idx,:]

X_test_phish_adv = np.zeros_like(X_test_phish_non_ordered)

for i in range(0,X_test_phish.shape[0]):
    first_img = X_test_phish_non_ordered[i,:]
    triple[0][0,:,:,:] = first_img
    first_img_label = int(y_test_phish_non_ordered[i,:])
    
    pos_img = pick_pos_img_idx(-0.1,first_img_label)
    triple[1][0,:,:,:] = pos_img
    
    #get image for the thrid: negative from legit
    neg_img,label_neg = pick_neg_img(first_img_label,155)
    while check_if_same_category(first_img_label,label_neg) == 1:
        neg_img,label_neg = pick_neg_img(first_img_label,155)
    triple[2][0,:,:,:] = neg_img
    
    anchor_noise,anchor_adv = get_adv_example(triple,epsilon)
    X_test_phish_adv[i,:] = anchor_adv

# Predict perturbed images using the saved model
inside_model = full_model.layers[3]
X_test_phish_adv_features = inside_model.predict(X_test_phish_adv)
np.save(output_dir+'X_test_phish_adv_features',X_test_phish_adv_features)