<a href="https://colab.research.google.com/github/GusSand/ML-BackDoor-Detector/blob/main/BadNetCleaner_v3_new_bdnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
import h5py
import numpy as np
import tempfile
import os
import h5py
import matplotlib.pyplot as plt
# tensorflow, keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers
from keras.models import load_model
from keras.preprocessing import image
from keras import models
# sklearn
from sklearn.neighbors import LocalOutlierFactor
import keras

In [2]:
# Only use for RTX-30, tensorflow-nightly-gpu bug (must set the GPU)
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
config=tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess=tf.compat.v1.Session(config=config)
keras.__version__

'2.4.3'

In [3]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [4]:
%cd "drive/My Drive/secml"
%ls

/content/drive/My Drive/secml
[0m[01;34m20201217-203813.297163[0m/  [01;34m20201217-204308.952941[0m/  [01;34m20201217-205624.489948[0m/
[01;34m20201217-203835.397107[0m/  [01;34m20201217-204709.428913[0m/  [01;34mdata[0m/
[01;34m20201217-204206.994060[0m/  [01;34m20201217-204729.275173[0m/  [01;34mmodels[0m/


In [5]:
def data_loader(filepath):
    data = h5py.File(filepath, 'r')
    x_data = np.array(data['data'])
    y_data = np.array(data['label'])
    x_data = x_data.transpose((0,2,3,1))

    return x_data, y_data

In [6]:
class NoveltyDetector(object):
    
    def __init__(self, badNet_model_filepath, img_shape = [1, 55, 47, 3], \
                 clean_img_filepath = 'data/clean_validation_data.h5', num_class = 1283):
        """
        Parameters
        ----------
        badNet_model_filepath : 'models/XXXXXXXX_bd_net.h5'
        
        Return
        ----------
        None.
        
        """
        self.img_shape = np.array(img_shape, dtype=int)
        self.clean_img_filepath = clean_img_filepath
        self.num_class = num_class
        
        self.badNet_model_filepath = badNet_model_filepath
        
        
    def load_badnet_model(self):
        """
        Load BadNet model
        
        -------
        self.bdnet_model:
        
        """
        self.bdnet_model = load_model(self.badNet_model_filepath)
        
    
    def clean_img_classify_by_label(self):
        """
        Classify images by their .h5 data labels
        
        -------
        self.clean_img_list:
           list [label, images_set_in_this_label], 
           len=1283.
        
        """
        data = h5py.File(self.clean_img_filepath, 'r')
        img_data = np.array(data['data'])
        img_label = np.array(data['label'])
        img_data = img_data.transpose((0,2,3,1))
        
        clean_img_list = []
        for label_i in range(self.num_class):
            label_index = np.argwhere(img_label==label_i)

            clean_img_list.append(np.squeeze(img_data[label_index]))
            
        self.clean_img_list = clean_img_list
    
    
    def conv_4_result(self, img_set):
        """
        Get conv4 layer neure activation results
        
        Return
        -------
        conv_result.
        
        """
        conv4_index = 8
        # Extracts the outputs of the conv_4 layer:
        layer_outputs = [layer.output for layer in self.bdnet_model.layers[conv4_index-1:conv4_index]]
        # Creates a model that will return these outputs, given the model input:
        activation_model = models.Model(inputs=self.bdnet_model.input, outputs=layer_outputs)

        num_img = img_set.shape[0]
        
        layer_activation = activation_model.predict(img_set/255)
        layer_activation = layer_activation.reshape((num_img,layer_activation.shape[3]*layer_activation.shape[1]*layer_activation.shape[2]))
        
        return layer_activation
    
    
    def extract_clean_conv4_characters(self):
        """
        Extract clean data characters in conv4 layer
        
        -------
        self.conv4_characters_list:
            list [label, conv4_characters_for_one_label],
            len=1283.
        """
        
        self.clean_img_classify_by_label()
        self.load_badnet_model()
        
        self.conv4_characters_list = []
        print("***Start to extract clean data characters in conv4 layer***")
        for inx, img_array in enumerate(self.clean_img_list):
            result = self.conv_4_result(img_array)
            self.conv4_characters_list.append(result)
        print("***Finish***")

    
    def image_novelty_detector_predict(self, val_img_set):
        """
        Get the right predict label from image dataset
        
        Returns
        -------
        y_hat : if the image_i is poisoned, y_hat[i] = self.num_class+1
        
        """
        print("***Start bad net prediction***")
        y_hat = np.argmax(self.bdnet_model.predict(val_img_set/255), axis=1)
        i = 0;
        print("***Predict finish***")
        print("***Start novelty detector prediction***")
        result_conv4 = self.conv_4_result(val_img_set)
        
        for label_hat in y_hat:
            MATRIX = np.concatenate((self.conv4_characters_list[label_hat], result_conv4[i][None,:]), axis = 0)

            clf = LocalOutlierFactor(n_neighbors=(MATRIX.shape[0] - 2))
            predict_result = clf.fit_predict(MATRIX)
            if(predict_result[-1] == -1):
                y_hat[i] = self.num_class
            
            i += 1
        print("***Predict finish***")
        print("*** Novelty Detector: Detected: {i}")
        return y_hat, result_conv4
    
    def get_conv4_characters_list(self):
        """
        Return conv4_characters_list
        
        Returns
        -------
        conv4_characters_list
        
        """
        return self.conv4_characters_list

In [7]:
class New_Decision_Function(object):
    
    def __init__(self, badNet_weights_filepath, conv4_characters_list, img_shape = [1, 55, 47, 3], \
                 clean_img_filepath = 'data/clean_validation_data.h5', num_class = 1283):
        """
        Parameters
        ----------
        badNet_weights_filepath : 'models/XXXXXXX_bd_weights.h5'
        conv4_characters_list : result from extract_clean_conv4_characters
        
        Return
        ----------
        None.
        
        """
        self.img_shape = np.array(img_shape, dtype=int)
        self.clean_img_filepath = clean_img_filepath
        self.num_class = num_class
        self.conv4_characters_list = conv4_characters_list
        self.badNet_weights_filepath = badNet_weights_filepath
        
    def sub_model_net(self):
        """
        Sub_model_net structure.
        
        Return
        ----------
        small_model.
        
        """
        # define input
        x = keras.Input(shape=(960), name='input')
        fc_2 = keras.layers.Dense(160, name='fc_2')(x)
        add_1 = keras.layers.Activation('relu')(fc_2)
        drop = keras.layers.Dropout(0.5)
        # output
        y_hat = keras.layers.Dense(1283, activation='softmax', name='output')(add_1)
        model = keras.Model(inputs=x, outputs=y_hat)

        return model

    def load_weights_to_sub_model(self):
        """
        Sub_model_net load weights.
        
        Return
        ----------
        None.
        
        """
        
        sub_model = self.sub_model_net()
        sub_model.load_weights(self.badNet_weights_filepath, by_name=True)  
        
        return sub_model
    
    def retrain_sub_model(self):
        """
        Sub_model_net retrain.
        
        Return
        ----------
        None.
        
        """
        
        self.sub_model = self.load_weights_to_sub_model()
        X = np.array(self.conv4_characters_list)
        X = np.reshape(X, (X.shape[0]*X.shape[1], X.shape[2]))
        y = np.repeat(np.arange(1283), 9)
        
        opt = optimizers.Adam(lr=0.001)
        self.sub_model.compile(optimizer=opt,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
        print("***Start to creat new decision model***")
        self.sub_model.fit(X, y, epochs=20)
        print("***Finish***")
    
    def image_new_decision_function_predict(self, img_conv4_result, img_label):
        
        """
        New decision function prediction.
        
        Return
        ----------
        yhat : New decision function prediction results.
        
        """
        
        print("***Start new decision function prediction***")
        y_hat_submodel = np.argmax(self.sub_model.predict(img_conv4_result), axis=1)
        
        poison_index = np.where(y_hat_submodel != img_label)
        y_hat = y_hat_submodel
        y_hat[poison_index] = self.num_class
        print("***Predict finish***")
        
        return y_hat
        

In [8]:
# input is 'models/XXXXXXXX_bd_net.h5', 'models/XXXXXXXX_bd_weights.h5', 'data/clean_validation_data.h5'

class BadNetCleaner(object):
    
    def __init__(self, badNet_model_filepath, badNet_weights_filepath):
        """
        Parameters
        ----------
        badNet_model_filepath : 'models/XXXXXXXX_bd_net.h5'
        badNet_weights_filepath : 'models/XXXXXXX_bd_weights.h5'
        
        Return
        ----------
        None.
        
        """
        self.badNet_model_filepath = badNet_model_filepath
        self.badNet_weights_filepath = badNet_weights_filepath
        
        self.novelty_detector = NoveltyDetector(self.badNet_model_filepath)
        self.novelty_detector.extract_clean_conv4_characters()
        self.conv4_characters_list = self.novelty_detector.get_conv4_characters_list()
        self.new_decision_function = New_Decision_Function(self.badNet_weights_filepath, self.conv4_characters_list)
        self.new_decision_function.retrain_sub_model()
        print("***Initialzation finish***")

        
    def predict_label(self, img_set):
        """
        Parameters
        ----------
        img_set : image data X, MUST NOT /255!
        
        Return
        ----------
        y_hat_2 : BadNetCleaner predict results.
        
        """
        
        print("**************************************************************")
        y_hat, img_conv4_result = self.novelty_detector.image_novelty_detector_predict(img_set)
        y_hat_2 = self.new_decision_function.image_new_decision_function_predict(img_conv4_result, y_hat)
        print("**************************END*********************************")
        
        return y_hat_2


In [9]:
bad_net_cleaner = BadNetCleaner('models/sunglasses_bd_net.h5','models/sunglasses_bd_weights.h5')

***Start to extract clean data characters in conv4 layer***
***Finish***
***Start to creat new decision model***
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
***Finish***
***Initialzation finish***


In [10]:
# Predict the poison data, label should be 1283 (N+1)
x_poison, y_poison = data_loader('data/sunglasses_poisoned_data.h5')
y_hat = bad_net_cleaner.predict_label(x_poison) # x_poison : image data X, MUST NOT /255!
class_accu = np.mean(np.equal(y_hat, 1283))*100
print('Classification accuracy:', class_accu)

**************************************************************
***Start bad net prediction***
***Predict finish***
***Start novelty detector prediction***
***Predict finish***
***Start new decision function prediction***
***Predict finish***
**************************END*********************************
Classification accuracy: 98.01247077162898


In [11]:
# Predict the clean test data, label should be y_label
x_test, y_test = data_loader('data/clean_test_data.h5')
y_hat_2 = bad_net_cleaner.predict_label(x_test) # x_test : image data X, MUST NOT /255!
class_accu_2 = np.mean(np.equal(y_hat_2, y_test))*100
print('Classification accuracy:', class_accu_2)

**************************************************************
***Start bad net prediction***
***Predict finish***
***Start novelty detector prediction***
***Predict finish***
***Start new decision function prediction***
***Predict finish***
**************************END*********************************
Classification accuracy: 84.17770849571318


# Try anonymous_bd_net

In [12]:
bad_net_cleaner_anonymous = BadNetCleaner('models/anonymous_bd_net.h5','models/anonymous_bd_weights.h5')



***Start to extract clean data characters in conv4 layer***
***Finish***
***Start to creat new decision model***
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
***Finish***
***Initialzation finish***
**************************************************************
***Start bad net prediction***
***Predict finish***
***Start novelty detector prediction***
***Predict finish***
***Start new decision function prediction***
***Predict finish***
**************************END*********************************
Classification accuracy: 85.01169134840218


In [None]:
# Predict the clean test data, label should be y_label
y_hat_3 = bad_net_cleaner_anonymous.predict_label(x_test) # x_test : image data X, MUST NOT /255!
class_accu_3 = np.mean(np.equal(y_hat_3, y_test))*100
print('Classification accuracy:', class_accu_3)

# Try multi_trigger_multi_target_bd

In [13]:
bad_net_cleaner_multi_trigger = BadNetCleaner('models/multi_trigger_multi_target_bd_net.h5','models/multi_trigger_multi_target_bd_weights.h5')



***Start to extract clean data characters in conv4 layer***
***Finish***
***Start to creat new decision model***
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
***Finish***
***Initialzation finish***
**************************************************************
***Start bad net prediction***
***Predict finish***
***Start novelty detector prediction***
***Predict finish***
***Start new decision function prediction***
***Predict finish***
**************************END*********************************
Classification accuracy: 84.2244738893219


In [None]:
# Predict the clean test data, label should be y_label
y_hat_4 = bad_net_cleaner_multi_trigger.predict_label(x_test) # x_test : image data X, MUST NOT /255!
class_accu_4 = np.mean(np.equal(y_hat_4, y_test))*100
print('Classification accuracy:', class_accu_4)