In [1]:
import keras
import sys
import h5py
import numpy as np
import argparse
from keras.preprocessing import image
import os
import shutil
from utils import *

In [68]:
RESULT_DIR = 'results/sunglasses'
TRIGGER_DIR = 'triggers/sunglasses'
IMG_FILENAME_TEMPLATE = 'multi_visualize_{}_label_{}.png'
INPUT_SHAPE = (55, 47, 3)
NUM_CLASSES = 1283  # total number of classes in the model
SELECTION = 0 # if not 0, select only this amount of triggers

mask_flatten = []
idx_mapping = {}

length = min(len(os.listdir(RESULT_DIR)) // 3, NUM_CLASSES)
for y_label in range(length):
    mask_filename = IMG_FILENAME_TEMPLATE.format('mask', y_label)
    file_path = os.path.join(RESULT_DIR, mask_filename)
    if os.path.isfile(file_path):
        img = image.load_img(file_path, color_mode='grayscale', target_size=INPUT_SHAPE)
        mask = image.img_to_array(img) / 255
        mask = mask[:, :, 0]

        mask_flatten.append(mask.flatten())
        idx_mapping[y_label] = len(mask_flatten) - 1

l1_norm_list = [np.sum(np.abs(m)) for m in mask_flatten]

# detect mad outliers
consistency_constant = 1.15  # if normal distribution
median = np.median(l1_norm_list)
mad = consistency_constant * np.median(np.abs(l1_norm_list - median))
min_mad = np.abs(np.min(l1_norm_list) - median) / mad

# print('median: {}, MAD: {}'.format(median, mad))
# print('anomaly index: {}'.format(min_mad))

flag_list = []
for y_label in idx_mapping.keys():
    if l1_norm_list[idx_mapping[y_label]] > median:
        continue
    if np.abs(l1_norm_list[idx_mapping[y_label]] - median) / mad > 2:
        flag_list.append((y_label, l1_norm_list[idx_mapping[y_label]]))

if len(flag_list) > 0:
    flag_list = sorted(flag_list, key=lambda x: x[1])

if not os.path.exists(TRIGGER_DIR):
    os.mkdir(TRIGGER_DIR)

if SELECTION and SELECTION < len(flag_list):
    flag_list = flag_list[:SELECTION]

for flag in flag_list:
    y_label = flag[0]
    for s in ['mask', 'pattern']:
        filename = IMG_FILENAME_TEMPLATE.format(s, y_label)
        src_path = os.path.join(RESULT_DIR, filename)
        dst_path = os.path.join(TRIGGER_DIR, filename)
        shutil.copyfile(src_path, dst_path)

In [2]:
KL_THRESHOLD = 5
NUM_CLASSES = 1283

args = {
    0: ("models/sunglasses_bd_net.h5", "triggers/sunglasses", 'sunglasses_visualize_{}_label_{}.png'),
    1: ("models/anonymous_1_bd_net.h5", "triggers/anonymous_1", 'anonymous_1_visualize_{}_label_{}.png'),
    2: ("models/anonymous_2_bd_net.h5", "triggers/anonymous_2", 'anonymous_2_visualize_{}_label_{}.png'),
    3: ("models/multi_trigger_multi_target_bd_net.h5", "triggers/Multi-trigger-Multi-target", 'multi_visualize_{}_label_{}.png')
}
bd_data = {
    0: ["original_data/data/sunglasses_poisoned_data.h5"],
    1: ["original_data/data/anonymous_1_poisoned_data.h5"],
    2: None,
    3: ["original_data/data/Multi-trigger Multi-target/{}_poisoned_data.h5".format(term) for term in ["eyebrows", "lipstick", "sunglasses"]]
}
cl_test = "original_data/data/clean_test_data.h5"

In [3]:
def data_loader(filepath):
    data = h5py.File(filepath, 'r')
    x_data = np.array(data['data'])
    x_data = x_data.transpose((0,2,3,1))
    y_data = np.array(data['label'])

    return x_data , y_data

In [10]:
def custom_test(data_path, bd_model, masks, patterns, idx_mapping):
    x, y = data_loader(data_path)
    y_pred = bd_model.predict(x / 255)
    label_p = np.argmax(y_pred, axis=1)
    acc1 = np.mean(np.equal(label_p, y)) * 100

    conditions = np.zeros_like(label_p).astype(np.bool8)
    adv_x = x.copy()
    for mask, pattern, y_label in zip(masks, patterns, idx_mapping):
        condition = (label_p == y_label)
        conditions += condition
        adv_x[condition] = (1-mask)[None, :, :, None] * x[condition] + mask[None, :, :, None] * pattern[None,...]

    adv_x = adv_x / 255

    adv_y_pred = bd_model.predict(adv_x)
    kl = KL_divergence(y_pred, adv_y_pred)
    tmp_label = label_p.copy()
    tmp_label[kl < KL_THRESHOLD] = NUM_CLASSES
    label_p[conditions] = tmp_label[conditions]
    
    acc2 = np.mean(np.equal(label_p, y)) * 100

    return acc1, acc2

In [11]:
for idx, t in args.items():
    print("********************************************")
    print("Badnet {}".format(idx))
    model_path, trigger_path, img_filename_template = t
    bd_model = keras.models.load_model(model_path)
    masks, patterns, idx_mapping = trigger_loader(trigger_path, img_filename_template)

    print("Test for clean accuracy")
    cl_acc_before, cl_acc_after = custom_test(cl_test, bd_model, masks, patterns, idx_mapping)
    print("Accuracy before adding triggers: {}".format(cl_acc_before))
    print("Accuracy after adding triggers: {}".format(cl_acc_after))

    bd_tests = bd_data[idx]
    if bd_tests is not None:
        print("Test for attack success rate")
        for bd_test in bd_tests:
            print("Data from: {}".format(bd_test))
            bd_asr_before, bd_asr_after = custom_test(bd_test, bd_model, masks, patterns, idx_mapping)
            print("Attack success rate before adding triggers: {}".format(bd_asr_before))
            print("Attack success rate after adding triggers: {}".format(bd_asr_after))

********************************************
Badnet 0
Test for clean accuracy


  kl = y_true * np.log(y_true / y_pred)
  kl = y_true * np.log(y_true / y_pred)


Accuracy before adding triggers: 97.77864380358535
Accuracy after adding triggers: 97.63055339049103
Test for attack success rate
Data from: original_data/data/sunglasses_poisoned_data.h5


  kl = y_true * np.log(y_true / y_pred)
  kl = y_true * np.log(y_true / y_pred)


Attack success rate before adding triggers: 99.99220576773187
Attack success rate after adding triggers: 0.00779423226812159
********************************************
Badnet 1
Test for clean accuracy
Accuracy before adding triggers: 97.1862821512081
Accuracy after adding triggers: 97.1083398285269
Test for attack success rate
Data from: original_data/data/anonymous_1_poisoned_data.h5
Attack success rate before adding triggers: 91.3971161340608
Attack success rate after adding triggers: 0.33125487139516757
********************************************
Badnet 2
Test for clean accuracy
Accuracy before adding triggers: 95.96258768511302
Accuracy after adding triggers: 95.8846453624318
********************************************
Badnet 3
Test for clean accuracy
Accuracy before adding triggers: 96.00935307872174
Accuracy after adding triggers: 94.4738893219018
Test for attack success rate
Data from: original_data/data/Multi-trigger Multi-target/eyebrows_poisoned_data.h5
Attack success rat