In [1]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [2]:
def preprocess():
    (x_train, y_train), (x_test, y_test)=tf.keras.datasets.cifar10.load_data()
    print("y_train_shape:",y_train.shape)
    y_train=y_train.reshape(50000)
    x_train_normalize =x_train.astype('float32') / 255.0
    x_test_normalize = x_test.astype('float32') / 255.0
    y_train_onehot=tf.one_hot( y_train,10)
    y_test_onehot=tf.one_hot( y_test,10)
    
    print("y_train_onehot_shape:",y_train_onehot.shape)
    
    #return x_train_normalize,x_test_normalize,y_train_onehot, y_test_onehot
    return x_train_normalize,x_test_normalize,y_train, y_test,y_train_onehot, y_test_onehot
x_train_normal,x_test_normal,y_train,y_test,y_train_onehot,y_text_onehot=preprocess()
resnet=tf.keras.models.load_model("resnet.h5")

y_train_shape: (50000, 1)
y_train_onehot_shape: (50000, 10)


In [16]:
#in attack tool there are fgsm pdg  deepfool
class attack_tool_cifar10():
    def __init__(self,model,img,label,y_train): #input label should be np array
        self.y_train=y_train
        #img=x_train_normal
        #label=one hot
        #y_train=y_train not one hot
        #-----basic data
        self.attack_model=model
        self.img=img#shape(1*32*32*3) np array
        print("basic data num:",self.img.shape[0])
        self.label=label #np array one hot
        self.prediction=model.predict(img) #predict return np model(x) return tensor
        
        #-----------------
        self.attack_target_num=0
        
        self.attack_img=0
        self.attack_predict_label=0
        self.attack_true_label=0
        self.attack_directors=0
     
    def same_label(self,predict_label,label):  #notice input numpy array do not use tensor  
        return np.where(predict_label==predict_label.max())==np.where(label==label.max())
    def attack_target(self): 
        
        img=self.img
        predict_label=self.prediction
        label=self.label
        #print(predict_label.shape)
        #print(label.shape)
                                   #notice input numpy array do not use tensor
        b=0                                     #img in shape 1*32*32*3
        for a in range(predict_label.shape[0]):
            #print("test box",predict_label[a],label[a],self.same_label(predict_label[a],label[a]))
            
            if b==0 and  self.same_label(predict_label[a],label[a]):

                target=img[b].reshape(1,32,32,3)
                pre_label=predict_label[b].reshape(1,10)
                target_label=label[b].reshape(1,10)
                b=b+1
            elif(b!=0 and self.same_label(predict_label[a],label[a])):
                target=np.concatenate((target,img[b].reshape(1,32,32,3)),axis=0)
            
                pre_label=np.concatenate((pre_label,predict_label[b].reshape(1,10)),axis=0)
                target_label=np.concatenate((target_label,label[b].reshape(1,10)),axis=0)
                #print(np.where(predict_label[a]==predict_label[a].max()))                  #test box
                #print(np.where(label[a]==label[a].max()))
                b=b+1 #find the num of attack target
                print(b)
                
                #print(target_label.shape)
            else:
                print("data had already confuse the model")
                print(np.where(predict_label[a]==predict_label[a].max()))
                print(np.where(label[a]==label[a].max()))
          
        self.attack_target_num=b
        print("can attack target have ",b)
        self.attack_img=target
        self.attack_predict_label=pre_label
        self.attack_true_label=target_label
        
        #return target,pre_label,target_label

            
        
    def attack_director(self,input_image,input_label): #input image
        model=self.attack_model
                
        
        loss_object = tf.keras.losses.CategoricalCrossentropy()
        input_label=input_label.reshape(1,10)

        with tf.GradientTape() as tape:
        
            input_image=tf.convert_to_tensor(input_image, dtype=tf.float32)
            tape.watch(input_image)
            prediction =model(input_image)
            loss = loss_object(input_label, prediction)

        # Get the gradients of the loss w.r.t to the input image.
        gradient = tape.gradient(loss, input_image)
        signed_grad = tf.sign(gradient)
        self.attack_directors=signed_grad
        return signed_grad
    def attack_noise(self,input_image,director,eps=0.8):   #output noise picture

        noise=eps*director.numpy()
        #noise=noise.reshape(32,32,3)
        #print(noise)
        #print("noise is")
        #plt.imshow(noise)
        noise_picture=input_image+noise
        return noise_picture,noise
    
    def fgsm(self,eps,num):
        robustness_all=0
        fail=0
        y_train=self.y_train
        model=self.attack_model
        attack_num=0
        attack_success=0
        print("attacking")
        for a in tqdm(range(num)):
            
            img_now=self.img[a].reshape(1,32,32,3)
            #print("test____________________",model(img_now).numpy(),tf.one_hot(y_train[num],10).numpy().reshape(1,10))
            
            if self.same_label(model(img_now).numpy(),tf.one_hot(y_train[a],10).numpy().reshape(1,10)):
                attack_num=attack_num+1
                img=self.img[a].reshape(1,32,32,3).copy()
                #print(img.shape)
                label=tf.one_hot(y_train[a],10).numpy().reshape(1,10)
                #print(label.shape)
                
                director=self.attack_director(img,label)

                noise_picture,noise=self.attack_noise(input_image=img,director=director,eps=eps) #calculate noise picture
                noise_picture=tf.clip_by_value(noise_picture,0,1)
                test=self.attack_model.predict(noise_picture)
                print("noise picture")
                print("noise prediction",test)
                plt.imshow(noise_picture.numpy().reshape(32,32,3))
                plt.show()
                print("clean picture")
                print(self.attack_model.predict(img))
                 #print(test.shape)
                plt.imshow(img.reshape(32,32,3))
                plt.show()

                if self.same_label(test.reshape(1,10),tf.one_hot(y_train[a],10).numpy().reshape(1,10)):
                    #print(test.reshape(10),tf.one_hot(y_train[num],10).numpy().reshape(1,10))
                    #print("fail")
                    fail=fail+1
                elif not self.same_label(test.reshape(1,10),tf.one_hot(y_train[a],10).numpy().reshape(1,10)):
                    robustness_all=robustness_all+tf.norm(noise)/tf.norm(img)
                    print("noise picture")
                    print(test)
                    plt.imshow(noise_picture.numpy().reshape(32,32,3))
                    plt.show()
                    print("clean picture")
                    print(y_train[a])
                 #print(test.shape)
                    plt.imshow(img.reshape(32,32,3))
                    plt.show()
                    #print("success")
                    attack_success=attack_success+1
        print("robustness=",robustness_all/attack_success)
        print("attack number is ",attack_num)
        print("success number is",attack_success)

    def PGD(self,eps,num,esporch=10,a_step=0.01):
        
        robustness_all=0
        fail=0
        y_train=self.y_train
        model=self.attack_model
        attack_num=0
        attack_success=0
        print("attacking")
        for a in tqdm(range(num)):
            
            img=self.img[a].copy().reshape(1,32,32,3)
            img_now=self.img[a].copy().reshape(1,32,32,3)
            #print("test____________________",model(img_now).numpy(),tf.one_hot(y_train[num],10).numpy().reshape(1,10))
            if self.same_label(model(img_now).numpy(),tf.one_hot(y_train[a],10).numpy().reshape(1,10)):
                noise_all=0
                attack_num=attack_num+1
                label=tf.one_hot(y_train[a],10).numpy().reshape(1,10)
                for c in range(esporch):
                    
                    #print(label.shape)
                    director=self.attack_director(img_now,label)
                    noise=a_step*director
                    noise_all=noise_all+noise
                    
                    noise_all=tf.clip_by_value(noise_all,-eps,eps)
                    """for noise not to exit eps"""
                    
                    img_now=img.copy()+noise_all
                    img_now=tf.clip_by_value(img_now,0,1)
                    
                        
                    
                    
                    #print(noise_all[0,:10],"_______")
                    

                test=self.attack_model.predict(img_now)
                    #print(test.shape)
                
                #print("______________",self.same_label(test.reshape(1,10),tf.one_hot(y_train[a],10).numpy().reshape(1,10)))

                if self.same_label(test.reshape(1,10),tf.one_hot(y_train[a],10).numpy().reshape(1,10)):
                    #print(test.reshape(10),tf.one_hot(y_train[a],10).numpy().reshape(1,10))
                    #print("fail")
                    fail=fail+1
                elif not self.same_label(test.reshape(1,10),tf.one_hot(y_train[a],10).numpy().reshape(1,10)):
                    print("test label",test.reshape(10))
                    #print("success")
                    print("noise picture")
                    plt.imshow(img_now.numpy().reshape(32,32,3))
                    plt.show()
                    print("original predict",self.attack_model.predict(img))
                    plt.imshow(noise_all.numpy().reshape(32,32,3))
                    plt.show()
                    plt.imshow(img.reshape(32,32,3))
                    plt.show()
                    print(test.reshape(1,10))
                    
                    robustness_all=robustness_all+tf.norm(noise_all)/tf.norm(img)
                    attack_success=attack_success+1
        print("success rate",attack_success/attack_num)
        print("attack number is ",attack_num)
        print("success number is",attack_success) 
        print("robustness=",robustness_all/attack_success)
        
               
            
            
            
    
        
        
                
    def deepfool1(self,model,img,predict_label,label,max_iter=10,class_num=10):
        img_row=img.copy()
        img_now=img
        #img shape(1, 32 , 32 , 3)
        #predict_label is  1,10
        #label not one hot is one mun
        #loss=tf.keras.losses.categorical_crossentropy()
        loop=0
        #print(label)
        label_one_hot=tf.one_hot(label,class_num) #1*10

        #print(predict_label)
        #print(model(img_now).numpy())
        start=0
        if not self.same_label(predict_label,model(img_now).numpy()) and start==0:
            r_total=0
            start=1
            return 0,0
            print("not same label")
        r_total=np.zeros((1,32,32,3))

        while self.same_label(predict_label,model(img_now).numpy()) and loop < max_iter:
            a=0
            r_total=0
            w = np.inf
            w_norm = np.inf
            pert = np.inf
            for k in range(class_num):
                #print("k",k)
                #print("label",label)

                with tf.GradientTape() as tape:
                    img_tensor=tf.convert_to_tensor(img_now)


                    tape.watch(img_tensor)

                    use=np.zeros(class_num,dtype='float32') 
                    use[label]=1.                 #to let you get the grident on the position you want
                    use=use.reshape(1,10)
                    use=tf.convert_to_tensor(use)

                    y=model(img_tensor)*use
                    #print(y)   
                #print(y)        
                grad_2=tape.gradient(y,img_tensor)



                if k != label:
                    a=a+1
                    #print(a,"_____")
                    #print(model(img).numpy()[0,k])
                    fk=model(img_now).numpy()[0,k]-model(img_now).numpy()[0,label]
                    #print(model(img_now))
                    #print(model(img_now).numpy()[0,label])
                    #print("fk",fk)  #---------------------fk

                    with tf.GradientTape() as tape:
                        img_tensor=tf.convert_to_tensor(img_now)


                        tape.watch(img_tensor)

                        use=np.zeros(class_num,dtype='float32') 
                        use[k]=1.                 #to let you get the grident on the position you want
                        use=use.reshape(1,10)
                        use=tf.convert_to_tensor(use)
                        #print(use)

                        y=model(img_tensor)*use   
                        #print(y)   

                    grad_k=tape.gradient(y,img_tensor)

                    grad_k=grad_k
                    w_k=(grad_k-grad_2)#test

                    pert_k=np.abs(fk)  / tf.norm(grad_k)


                    if pert_k < pert:

                        pert=pert_k
                        w=w_k
                        w_norm=tf.norm(w_k)
            #print("w",grad_k)  
            #print("w2",grad_2)
            #print("pert=",pert)
            r_i=w*(pert+1e-4)*w_norm/w_norm   

            #print(r_i)
            #print("img_now",r_i)
            r_total=r_total+r_i

            img_now=img_now+r_i
            #test
            img_now=tf.clip_by_value(img_now,0,1) #______________________


            loop=loop+1

            if loop == max_iter:
                print("out of loop")
                success=0
            elif not self.same_label(predict_label,model(img_now).numpy()): 
                #print("success attack")
#                 plt.imshow(img_row.reshape(32,32,3))
#                 plt.show()
#                 plt.imshow(img_now.numpy().reshape(32,32,3))
#                 plt.show()
                
                success=True

        return r_total,success
        #print(f_k2)
    def attack_all_deepfool(self,num):
        y_train=self.y_train
        resnet=self.attack_model
        robustness_all=0
        success_num=0
        attack_number=0
        b=0
        for a in tqdm(range(num)):
            x=self.img[a].reshape(1,32,32,3)

            #print(x1)
            y=self.label[a].numpy().reshape(1,10)
            #pre=self.model(x)
            #print(same_label(y.numpy(),pre))
            #print(x)
            if self.same_label(y,self.attack_model(x).numpy()):
                noise,success=self.deepfool1(resnet,x,y,y_train[a])
                attack_number+=1
            if success:
                print("success")
                print(robustness_all)
                robustness_all=robustness_all+tf.norm(noise)/tf.norm(x)
                b=b+1
                success=0
                success_num+=1
        print(attack_number)
        print("success_num",success_num)
        robustness=robustness_all/success_num
        print("robustness=",robustness)
        return b,robustness
    

In [17]:
attack=attack_tool_cifar10(resnet,x_train_normal,y_train_onehot,y_train) #bult the attack tood

basic data num: 50000


In [26]:
#attack.PGD(0.1,100,esporch=10,a_step=0.01) #PGD attack

In [9]:
#attack.PGD(0.1,10000,esporch=10,a_step=0.01)

In [10]:
#attack.PGD(0.1,10000,esporch=20,a_step=0.001)

In [7]:
#attack.PGD(0.1,10,esporch=10,a_step=0.01)

In [8]:
#attack.fgsm(0.3,10000) 

In [25]:
#attack.fgsm(0.2,10000) #fgsm attack

In [24]:
#attack.fgsm(0.1,10000)

In [23]:
#attack.fgsm(0.05,10000)

In [22]:
#attack.fgsm(0.01,10000)

In [5]:
#attack.fgsm(0.001,10000)

In [6]:
#attack.fgsm(0.0005,10000)