In [99]:
import keras
import sys
import h5py
import numpy as np
from keras.models import Model
import tensorflow as tf
!pip install tensorflow_model_optimization 
import tensorflow_model_optimization as tfmot
import tempfile



In [100]:
from google.colab import drive
drive.mount('/content/drive')
!git clone https://github.com/csaw-hackml/CSAW-HackML-2020.git

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
fatal: destination path 'CSAW-HackML-2020' already exists and is not an empty directory.


In [101]:
def data_loader(filepath):
    data = h5py.File(filepath, 'r')
    x_data = np.array(data['data'])
    y_data = np.array(data['label'])
    x_data = x_data.transpose((0,2,3,1))

    return x_data, y_data

def data_preprocess(x_data):
    return x_data/255

In [102]:
clean_data_filename = "/content/drive/MyDrive/clean_validation_data.h5"
model_filename = "/content/CSAW-HackML-2020/models/sunglasses_bd_net.h5"
x_test, y_test = data_loader(clean_data_filename)
x_test = data_preprocess(x_test)

In [103]:
model = keras.models.load_model(model_filename)
#model.load_weights("/content/CSAW-HackML-2020/models/sunglasses_bd_weights.h5")
weights = np.array(model.get_weights())
print(weights[6].shape)
print(model.summary())

(2, 2, 60, 80)
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 55, 47, 3)]  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 52, 44, 20)   980         input[0][0]                      
__________________________________________________________________________________________________
pool_1 (MaxPooling2D)           (None, 26, 22, 20)   0           conv_1[0][0]                     
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 24, 20, 40)   7240        pool_1[0][0]                     
_____________________________________________________________________________

  This is separate from the ipykernel package so we can avoid doing imports until


In [104]:
pool3 = Model(inputs=model.input,outputs=model.get_layer('pool_3').input)
out = pool3.predict(x_test)
print(out.shape)
activation = np.mean(out, axis=(0,1,2))
print(activation)
seq_sort = np.argsort(activation)
print(seq_sort)

(11547, 10, 8, 60)
[9.6013945e-01 1.3590817e+00 1.3118883e+00 3.1096270e+00 3.8877401e+00
 3.5732110e+00 4.9756446e+00 3.8493168e+00 1.8166021e+00 2.0806956e+00
 2.1497428e+00 3.2076209e+00 2.6633713e-01 2.5731442e+00 1.0528394e+00
 2.5300980e+00 2.1391459e+00 3.0582151e+00 4.3949590e+00 3.5355718e+00
 2.9967165e+00 2.7073579e+00 4.8394408e+00 2.4703627e+00 2.4340577e+00
 3.6329279e+00 1.6439316e+00 2.3767173e+00 1.6864364e+00 4.1031537e+00
 3.6172864e+00 5.1642329e-01 4.6472487e-05 1.9859385e+00 2.8094754e-01
 6.3475609e+00 5.9325069e-01 5.1831871e-01 4.2631078e+00 4.6744690e+00
 3.6585009e+00 3.5064101e+00 1.1827018e+00 2.7779670e+00 1.6325895e-02
 2.5966039e+00 3.0764854e+00 2.0676191e+00 2.0804553e+00 2.2883293e-04
 2.5152922e+00 2.9261477e+00 1.7282609e+00 2.9503524e+00 6.5149765e+00
 3.7570691e+00 4.0762239e+00 9.8970121e-01 1.2280625e+00 2.0454834e+00]
[32 49 44 12 34 31 37 36  0 57 14 42 58  2  1 26 28 52  8 33 59 47 48  9
 16 10 27 24 23 50 15 13 45 21 43 51 53 20 17 46  3 11 

In [105]:
pruning_mask = np.zeros(60, dtype=bool)
for i in range(48):
    channel = seq_sort[i]
    weights[4][:,:,:,channel] = 0.
    weights[5][channel] = 0.
    weights[6][:,:,channel,:] = 0.
model.set_weights(weights)
prune_low_magnitude=tfmot.sparsity.keras.prune_low_magnitude
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.8, 0),
    'block_size': (1, 1),
}
def apply_pruning_to_dense(layer):
  if layer.name in ['conv_3',"pool_3","conv_4"]:
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  return layer
model_for_pruning=tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model_for_pruning.summary()
model_for_pruning.save("/content/CSAW-HackML-2020/models/new_model1.h5")

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 55, 47, 3)]  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 52, 44, 20)   980         input[0][0]                      
__________________________________________________________________________________________________
pool_1 (MaxPooling2D)           (None, 26, 22, 20)   0           conv_1[1][0]                     
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 24, 20, 40)   7240        pool_1[1][0]                     
____________________________________________________________________________________________



In [106]:
train_images, train_labels=data_loader("/content/drive/MyDrive/clean_validation_data.h5")
train_images = data_preprocess(train_images)

In [107]:
batch_size = 128
epochs = 20
num_images = train_images.shape[0]*0.9
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs
#train and evaluate the model against baseline -- fine-tuning
logdir = tempfile.mkdtemp()
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
] 
model_for_pruning.fit(train_images, train_labels,
                  batch_size=batch_size, epochs=epochs, validation_split=0.1,
                  callbacks=callbacks)
#test poison data accuracy
clean_data_test=str('/content/drive/MyDrive/sunglasses_poisoned_data.h5')
test_images, test_labels = data_loader(clean_data_test)
test_images=data_preprocess(test_images)
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   test_images, test_labels, verbose=0)
print('Pruned test accuracy:', model_for_pruning_accuracy)
#test clean data accuracy
clean_data_test=str('/content/drive/MyDrive/clean_test_data.h5')
test_images, test_labels = data_loader(clean_data_test)
test_images=data_preprocess(test_images)
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   test_images, test_labels, verbose=0)
print('Pruned test accuracy:', model_for_pruning_accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Pruned test accuracy: 0.0015588464448228478
Pruned test accuracy: 0.876227617263794
