In [1]:
pip install -q tensorflow-model-optimization

[?25l[K     |██                              | 10kB 21.2MB/s eta 0:00:01[K     |███▉                            | 20kB 27.0MB/s eta 0:00:01[K     |█████▊                          | 30kB 12.7MB/s eta 0:00:01[K     |███████▋                        | 40kB 10.3MB/s eta 0:00:01[K     |█████████▌                      | 51kB 4.4MB/s eta 0:00:01[K     |███████████▍                    | 61kB 4.6MB/s eta 0:00:01[K     |█████████████▎                  | 71kB 4.9MB/s eta 0:00:01[K     |███████████████▏                | 81kB 5.3MB/s eta 0:00:01[K     |█████████████████               | 92kB 5.6MB/s eta 0:00:01[K     |███████████████████             | 102kB 4.3MB/s eta 0:00:01[K     |████████████████████▉           | 112kB 4.3MB/s eta 0:00:01[K     |██████████████████████▊         | 122kB 4.3MB/s eta 0:00:01[K     |████████████████████████▊       | 133kB 4.3MB/s eta 0:00:01[K     |██████████████████████████▋     | 143kB 4.3MB/s eta 0:00:01[K     |███████████████████████

In [3]:
from google.colab import drive

drive.mount('/content/drive')
%cd /content/drive/My\ Drive/ML\ Project/

Mounted at /content/drive
/content/drive/My Drive/ML Project


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import h5py
from PIL import Image
from matplotlib import pyplot as plt
from keras.preprocessing.image import array_to_img
import sys
import tensorflow_model_optimization as tfmot
import tempfile

In [4]:
B4_model = './github/models/anonymous_2_bd_net.h5'
model_B4 = keras.models.load_model(B4_model)
B4_para = './github/models/anonymous_2_bd_weights.h5'
model_B4.load_weights(B4_para)

model_B4.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 55, 47, 3)]  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 52, 44, 20)   980         input[0][0]                      
__________________________________________________________________________________________________
pool_1 (MaxPooling2D)           (None, 26, 22, 20)   0           conv_1[0][0]                     
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 24, 20, 40)   7240        pool_1[0][0]                     
____________________________________________________________________________________________

In [5]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

In [6]:
def data_loader(filepath):
    data = h5py.File(filepath, 'r')
    x_data = np.array(data['data'])
    y_data = np.array(data['label'])
    x_data = x_data.transpose((0,2,3,1))
    return x_data / 255, y_data

clean_validation_data_path = './github/data/clean_validation_data.h5'
clean_test_data_path = './github/data/clean_test_data.h5'
sunglasses_poisoned_data_path = './github/data/sunglasses_poisoned_data.h5'

x_data_valid, y_data_valid = data_loader(clean_validation_data_path)
x_data_test, y_data_test = data_loader(clean_test_data_path)
x_data_backdoor, y_data_backdoor = data_loader(sunglasses_poisoned_data_path)

In [7]:
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.5,
                                    final_sparsity=0.8,
                                    begin_step=0,
                                    end_step=2000)
}

model_B4_after_pruning = prune_low_magnitude(model_B4, **pruning_params)



In [8]:
model_B4_after_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_B4_after_pruning.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 55, 47, 3)]  0                                            
__________________________________________________________________________________________________
prune_low_magnitude_conv_1 (Pru (None, 52, 44, 20)   1942        input[0][0]                      
__________________________________________________________________________________________________
prune_low_magnitude_pool_1 (Pru (None, 26, 22, 20)   1           prune_low_magnitude_conv_1[0][0] 
__________________________________________________________________________________________________
prune_low_magnitude_conv_2 (Pru (None, 24, 20, 40)   14442       prune_low_magnitude_pool_1[0][0] 
____________________________________________________________________________________________

In [9]:
logdir = tempfile.mkdtemp()

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
  keras.callbacks.EarlyStopping(monitor="val_loss", patience=0)
]

model_B4_after_pruning.fit(x_data_valid, y_data_valid, epochs=3, callbacks=callbacks)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7ff27e5f6668>

In [10]:
B4_model = './github/models/anonymous_2_bd_net.h5'
model_B4 = keras.models.load_model(B4_model)
B4_para = './github/models/anonymous_2_bd_weights.h5'
model_B4.load_weights(B4_para)

model_B4.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 55, 47, 3)]  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 52, 44, 20)   980         input[0][0]                      
__________________________________________________________________________________________________
pool_1 (MaxPooling2D)           (None, 26, 22, 20)   0           conv_1[0][0]                     
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 24, 20, 40)   7240        pool_1[0][0]                     
____________________________________________________________________________________________

In [11]:
def evaluate(model, x, y):
  pred = np.argmax(model.predict(x), axis=1)
  acc = np.mean(np.equal(pred, y))*100
  return acc

In [12]:
print('model_B4_after_pruning valid accuracy:', evaluate(model_B4_after_pruning, x_data_valid, y_data_valid))

model_B4_after_pruning valid accuracy: 97.05551225426518


In [13]:
print('model_B4_after_pruning test accuracy:', evaluate(model_B4_after_pruning, x_data_test, y_data_test))

model_B4_after_pruning test accuracy: 86.61730319563523


In [14]:
print('model_B4 valid accuracy:', evaluate(model_B4, x_data_valid, y_data_valid))

model_B4 valid accuracy: 95.82575560751711


In [15]:
print('model_B4 test accuracy:', evaluate(model_B4, x_data_test, y_data_test))

model_B4 test accuracy: 95.96258768511302


In [16]:
p1 = np.argmax(model_B4.predict(x_data_backdoor), axis=1)
p2 = np.argmax(model_B4_after_pruning.predict(x_data_backdoor), axis=1)
print(p1)
print(p2)
n = 0
for a, b in zip(p1, p2):
  if a != b:
    n += 1
print('The number of cases we identify as backdoor data in the sunglasses dataset is:', n)

[4 4 4 ... 4 4 4]
[304   4 485 ... 860 333   4]
The number of cases we identify as backdoor data in the sunglasses dataset is: 5832


In [17]:
def repaired_B4(x):
  y1 = np.argmax(model_B4.predict(x), axis=1)
  y2 = np.argmax(model_B4_after_pruning.predict(x), axis=1)
  res = []
  for i in range(len(x)):
    if y1[i] == y2[i]:
      res.append(y1[i])
    else:
      res.append(1283)
  return np.array(res)

In [18]:
repaire_B4_valid_res = repaired_B4(x_data_valid)
repaire_B4_valid_acc = np.mean(np.equal(repaire_B4_valid_res, y_data_valid))*100
print('repaired B4 valid accuracy: ',repaire_B4_valid_acc)

repaired B4 valid accuracy:  93.15839612020437


In [19]:
repaire_B4_test_res = repaired_B4(x_data_test)
repaire_B4_test_acc = np.mean(np.equal(repaire_B4_test_res, y_data_test))*100
print('repaired B4 test accuracy: ',repaire_B4_test_acc)

repaired B4 test accuracy:  84.48947778643804


In [20]:
model_B4_after_pruning.save('./Pruned_B4.h5')