In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.backend as K
import random
from tensorflow.keras.models import Model
import pickle
import pandas
import sklearn
from sklearn.model_selection import train_test_split
import h5py

In [2]:
from tensorflow.keras.models import Sequential
# from tensorflow.keras.utils import np_utils
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.datasets import cifar10
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import LearningRateScheduler
import numpy as np
import matplotlib.pyplot as plt

In [3]:
clean_data_filename = 'clean_validation_data.h5'
poison_data_filename = None #none is for multi label data
test_data_filename = 'clean_test_data.h5'

multi_trigger = False
if poison_data_filename == None:
    multi_trigger = True



model_filename = 'multi_trigger_multi_target_bd_net.h5'
save_path_name = 'good_multi_trigger_multi_target_bd_net.h5'
def data_loader(filepath):
    data = h5py.File(filepath, 'r')
    x_data = np.array(data['data'])
    y_data = np.array(data['label'])
    x_data = x_data.transpose((0,2,3,1))

    return x_data, y_data

def data_preprocess(x_data):
    return x_data/255

def get_model(filename=model_filename):
    return load_model(filename)

clean_x, clean_y = data_loader(clean_data_filename)
clean_x = data_preprocess(clean_x)


if multi_trigger:
    data_1 = "Multi-trigger Multi-target-20201221T062111Z-001/Multi-trigger Multi-target/eyebrows_poisoned_data.h5"
    data_2 = "Multi-trigger Multi-target-20201221T062111Z-001/Multi-trigger Multi-target/sunglasses_poisoned_data.h5"
    data_3 = "Multi-trigger Multi-target-20201221T062111Z-001/Multi-trigger Multi-target/lipstick_poisoned_data.h5"
    pois_x_1, pois_y_1 = data_loader(data_1)
    pois_x_1 = data_preprocess(pois_x_1)
    pois_x_2, pois_y_2 = data_loader(data_2)
    pois_x_2 = data_preprocess(pois_x_2)
    pois_x_3, pois_y_3 = data_loader(data_3)
    pois_x_3 = data_preprocess(pois_x_3)
    pois_x = np.concatenate([pois_x_1, pois_x_2, pois_x_3], axis=0)
    pois_y = np.concatenate([pois_y_1, pois_y_2, pois_y_3], axis=0)
    
else:
    pois_x, pois_y = data_loader(poison_data_filename)
    pois_x = data_preprocess(pois_x)
    pois_x, pois_y = sklearn.utils.shuffle(pois_x, pois_y)
test_x, test_y = data_loader(test_data_filename)
test_x = data_preprocess(test_x)


# bd_model = keras.models.load_model(model_filename)

# clean_label_p = np.argmax(bd_model.predict(clean_x), axis=1)
# class_accu = np.mean(np.equal(clean_label_p, y_test))*100
# print('Classification accuracy:', class_accu)

In [4]:
pois_x.shape


(30792, 55, 47, 3)

In [5]:
from tensorflow.keras.models import load_model
# model.load_weights("sunglasses_bd_weights.h5")
# model.summary()
# model = load_model("sunglasses_bd_net.h5")
# model.summary()

In [6]:
import os
files_in_dir = os.listdir()
weights_in_dir = []
for file in files_in_dir:
    if 'data' in file:
        weights_in_dir.append(file)
print(weights_in_dir)

['anonymous_1_poisoned_data.h5', 'clean_test_data.h5', 'clean_validation_data.h5', 'eyebrows_poisoned_data.h5', 'lipstick_poisoned_data.h5', 'sunglasses_poisoned_data.h5']


In [7]:
def get_accuracy(bd_model, clean_x=clean_x, clean_y=clean_y, name='Clean'):
    clean_label_p = np.argmax(bd_model.predict(clean_x), axis=1)
    class_accu = np.mean(np.equal(clean_label_p, clean_y))*100
    print('{} accuracy is {}'.format(name, class_accu))
    return class_accu, clean_label_p

In [8]:
# for file in weights_in_dir:
#     bd_model = get_model(file)
#     clean_label_p = np.argmax(bd_model.predict(clean_x), axis=1)
#     class_accu = np.mean(np.equal(clean_label_p, clean_y))*100
#     print('Classification accuracy:', class_accu)
# #     class_accu = np.mean(np.equal(clean_y, y_pred))*100
#     print("Weights : {}".format(file))
# #     print('Classification accuracy:', class_accu)
#     print('----------------------------------------------------------')

In [9]:
x_val = clean_x[-10000:]
y_val = clean_y[-10000:]
x_train = clean_x[:-10000]
y_train = clean_y[:-10000]

In [10]:
# from tensorflow.keras.applications.vgg16 import VGG16
# # from tensorflow.keras.applications import preprocess_input
# from tensorflow.keras.preprocessing.image import load_img
# from tensorflow.keras.preprocessing.image import img_to_array
# from tensorflow.keras.models import Model
# from matplotlib import pyplot as plt
# from numpy import expand_dims
# # load the model
# # redefine model to output right after the first hidden layer
# conv_layer_indices = []
# for i, layer in enumerate(model.layers):
#     if isinstance(layer, tf.keras.layers.Conv2D):
#         conv_layer_indices.append(i)

# model_1 = Model(inputs=model.inputs, outputs=model.layers[conv_layer_indices[-1]].output)

# # load the image with the required shape
# # img = load_img(, target_size=(55, 47, 3))
# img = clean_x[400]
# # convert the image to an array
# img = img_to_array(img)
# # expand dimensions so that it represents a single 'sample'
# img = expand_dims(img, axis=0)
# # prepare the image (e.g. scale pixel values for the vgg)
# # img = preprocess_input(img)
# # get feature map for first hidden layer
# feature_maps = model_1.predict(img)

# print(feature_maps.shape)

# # plot all 64 maps in an 8x8 squares
# square = 4
# ix = 1
# plt.figure(figsize=(20,10))
# for _ in range(square):
#     for _ in range(square):
#     # specify subplot and turn of axis
#         ax = plt.subplot(square, square, ix)
#         ax.set_xticks([])
#         ax.set_yticks([])
#         plt.imshow(feature_maps[0, :, :, ix-1], cmap='gray')
#         ix += 1
# # show the figure
# plt.show()

In [11]:
current_model = get_model()
get_accuracy(current_model, pois_x, pois_y)

Clean accuracy is 94.29072486360094


(94.29072486360094, array([5, 5, 5, ..., 1, 1, 1], dtype=int64))

In [12]:
def get_con_layers_indices(model):
    idx = []
    for i, layer in enumerate(model.layers):
        if isinstance(layer, tf.keras.layers.Conv2D):
            idx.append(i)
    
    return idx

import sklearn
clean_x, clean_y = sklearn.utils.shuffle(clean_x, clean_y)
x_test, x_train = clean_x[:2000], clean_x[2000:]
y_test, y_train = clean_y[:2000], clean_y[2000:]


def train_model(new_model, x_train=x_train, y_train=y_train):
    new_model.compile(
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            optimizer='adam',
            metrics=['accuracy'])

    new_model.fit(x_train, y_train,
              batch_size=64,
              epochs=15,
              verbose=1,
              validation_split=0.2)

    return new_model
    

In [13]:
def pruning(X, layer_number, model, pruning_percent=0.7):
    model_1 = Model(inputs=model.inputs, outputs=model.layers[layer_number].output)
    feature_maps = model_1.predict(X)
    activation = np.mean(feature_maps, axis=(0,1,2))
    sorted_indices = np.argsort(activation)
    total_channels = activation.shape[0]
    pruning_channels = int(pruning_percent * total_channels)
    for i in range(pruning_channels):
        channel = sorted_indices[i]
        weights, bias = model.layers[layer_number].get_weights()
        weights[:, :, :, channel] = 0
        bias[channel] = 0
        model.layers[layer_number].set_weights([weights, bias])
    print('Pruning Percent: {}'.format(pruning_percent * 100))
    clean_class_accu, _ = get_accuracy(model)
    attack_class_accu, _ = get_accuracy(model, pois_x, pois_y, name='Attack')
    return model, clean_class_accu, attack_class_accu

In [14]:
clean_x_sub = clean_x[:2000]
current_model = get_model()
conv_layer_indices = get_con_layers_indices(current_model)


In [15]:
n_samples = 100
# rand_indices = random.sample(range(0, clean_x.shape[0]), n_samples)
percent_acc = {}
new_percent_acc = {}
for p in range(1, 11, 1):
    clean_acc, attack_acc = [], []
    new_clean_acc, new_attack_acc = [], []
    for num in conv_layer_indices:
        current_model = get_model()
        print('-------------------------------------------------------------------------------------------------------------')
        print('Model after pruning, before tuning')
        m , c, a = pruning(clean_x_sub, num, current_model, p/10)
#         clean_acc.append((c,num, p))
#         attack_acc.append((a, num, p))
#         print('Model after fine pruning, after tuning')
# #         trained_model = train_model(m)
#         a = get_accuracy(trained_model)
#         b = get_accuracy(trained_model, pois_x, pois_y, name='Attack')
#         new_clean_acc.append((a, num, p))
#         new_attack_acc.append((b, num, p))
#         print('-----------------------------------------------------------------------------------------------------------------')
        percent_acc[(p, num)] = (c, a)
#     new_percent_acc[p] = (new_clean_acc, new_attack_acc)

-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 10.0
Clean accuracy is 96.26742876937733
Attack accuracy is 94.29072486360094
-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 10.0
Clean accuracy is 96.21546722092317
Attack accuracy is 94.16406858924395
-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 10.0
Clean accuracy is 96.2068069628475
Attack accuracy is 94.28422967004416
-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 10.0
Clean accuracy is 96.14618515631766
Attack accuracy is 94.30696284749285
-----------------------------------------

Pruning Percent: 90.0
Clean accuracy is 13.09431021044427
Attack accuracy is 13.46778383995843
-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 90.0
Clean accuracy is 65.59279466528103
Attack accuracy is 93.74512860483243
-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 100.0
Clean accuracy is 0.0779423226812159
Attack accuracy is 33.33333333333333
-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 100.0
Clean accuracy is 0.0779423226812159
Attack accuracy is 33.33333333333333
-------------------------------------------------------------------------------------------------------------
Model after pruning, before tuning
Pruning Percent: 100.0
Clean accurac

In [16]:
percent_acc

{(1, 1): (96.26742876937733, 94.29072486360094),
 (1, 3): (96.21546722092317, 94.16406858924395),
 (1, 5): (96.2068069628475, 94.28422967004416),
 (1, 7): (96.14618515631766, 94.30696284749285),
 (2, 1): (96.26742876937733, 94.28747726682255),
 (2, 3): (95.9210184463497, 81.90763834762276),
 (2, 5): (95.81709534944142, 94.04390750844375),
 (2, 7): (96.05092231748506, 94.26474408937386),
 (3, 1): (96.1981467047718, 91.6406858924396),
 (3, 3): (94.94240928379666, 40.24097168095609),
 (3, 5): (94.05906296007622, 93.23850350740453),
 (3, 7): (94.85580670303975, 94.46934268641205),
 (4, 1): (96.1115441240149, 89.69862301896596),
 (4, 3): (93.33160128171819, 38.85100025980775),
 (4, 5): (90.21390837446957, 56.9790854767472),
 (4, 7): (93.65203083051875, 94.46934268641205),
 (5, 1): (95.81709534944142, 63.03260587165498),
 (5, 3): (84.63670217372477, 41.79332294102364),
 (5, 5): (84.72330475448169, 35.06755001299039),
 (5, 7): (91.53026760197453, 94.3394388152767),
 (6, 1): (94.66528102537455

In [17]:
layer_number = float('-inf')
percent = float('-inf')
min_acc = float('inf')
opt_percent = None
for key, value in percent_acc.items():
    current_percent, current_layer = key[0], key[1]
    def_acc, at_acc = value[0], value[1]
    if def_acc > 55 and at_acc < min_acc:
        min_acc = at_acc
        layer_number =current_layer
        opt_percent = current_percent / 10
        
    

In [18]:
layer_number, opt_percent

(3, 0.6)

In [19]:
current_model = get_model()
pruned_model , _, _ = pruning(clean_x_sub, layer_number, current_model, opt_percent)
defended_model = train_model(pruned_model)

Pruning Percent: 60.0
Clean accuracy is 61.82558240235559
Attack accuracy is 20.891790075344247
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [20]:
anon_x, anon_y = pois_x, pois_y
# anon_x = data_preprocess(anon_x)

In [21]:
current_model = get_model()

acc, idxs = get_accuracy(current_model)#, clean_x=anon_x, clean_y=anon_y, name='Attack') 
acc, idxs = get_accuracy(current_model, clean_x=anon_x, clean_y=anon_y, name='Attack') 
print('-------------------------------------------------------------------------------------------')
acc, idxs = get_accuracy(defended_model)#, clean_x=anon_x, clean_y=anon_y, name='Attack') 
acc, idxs = get_accuracy(defended_model, clean_x=anon_x, clean_y=anon_y, name='Attack') 

Clean accuracy is 96.26742876937733
Attack accuracy is 94.29072486360094
-------------------------------------------------------------------------------------------
Clean accuracy is 93.981120637395
Attack accuracy is 5.8554169914263445


In [22]:
# defended_model = get_model(os.path.join(output_path, 'good_net_multi_trigger_multi_target_bd_net.h5'))
last_layer = tf.keras.layers.Dense(1284, activation='softmax')

anon_y = np.array([1283 for i in range(len(anon_y))], dtype=np.int32)

new_x = np.concatenate([clean_x, anon_x])
# new_y = np.expand_dims(np.concatenate([clean_y, anon_y], axis=0), axis=0)
new_y = np.concatenate([clean_y, anon_y])
train_x, test_x, train_y, test_y = train_test_split(new_x, new_y, test_size=0.2, shuffle=True)

In [23]:
def freeze_layers(model):
    for i in range(1, len(model.layers) - 1):
        model.layers[i].trainable = False
    return model

In [24]:
x = defended_model.layers[-2].output
x = tf.keras.layers.Dense(1284, activation='softmax')(x)
defend_model_with_new_layer = Model(inputs=defended_model.input, outputs=x)
# final_output_model = freeze_layers(defend_model_with_new_layer)
final_output_model = train_model(defend_model_with_new_layer, train_x, train_y)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [25]:
acc, idxs = get_accuracy(final_output_model, clean_x=train_x, clean_y=train_y, name=' ')

  accuracy is 97.49638333677777


In [26]:
acc, idxs = get_accuracy(final_output_model, clean_x=test_x, clean_y=test_y, name=' ')

  accuracy is 89.92678318375059


In [27]:
acc, idxs = get_accuracy(final_output_model, clean_x=clean_x, clean_y=clean_y, name='Clean ')
acc, idxs = get_accuracy(final_output_model, clean_x=pois_x, clean_y=[1283] * len(pois_y), name='Attack ')

Clean  accuracy is 87.25210011258335
Attack  accuracy is 99.25630033775008


In [28]:
output_path = 'C:/Users/Anand/Desktop/output_models'
defended_model.save(os.path.join(output_path, save_path_name))