In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.framework import graph_util
from tensorflow.python.platform import gfile
import os
pb_file_path="tensorflow_mnist_graph.pb"  # 模型文件
mnist=input_data.read_data_sets('MNIST_data/',one_hot=True) # mnist 数据
# 预测用的会话
persisted_sess = tf.Session()
with gfile.FastGFile(pb_file_path,'rb') as f:
    graph_def=tf.GraphDef()
    graph_def.ParseFromString(f.read())
    persisted_sess.graph.as_default()
    tf.import_graph_def(graph_def,name='')
persisted_sess.graph.get_operations()
# 记载输入和输出tensor
persisted_input=persisted_sess.graph.get_tensor_by_name("input:0")
persisted_keep_prob = persisted_sess.graph.get_tensor_by_name("keep_prob:0")
persisted_output = persisted_sess.graph.get_tensor_by_name("output:0")

In [None]:
print('>> Computing feedforward function...')
def f(image_inp,keep_prob=1.0):
    return persisted_sess.run(persisted_output, 
                              feed_dict={persisted_input: image_inp,persisted_keep_prob:keep_prob})
# 数据已经归一化
test_x=mnist.test.images
test_y=mnist.test.labels

In [None]:
# 批量验证测试集的准确率
from sklearn.metrics import accuracy_score
y_pred=np.argmax(f(test_x),axis=1)
y_true=np.argmax(test_y,axis=1)
print(accuracy_score(y_true,y_pred,normalize=True))
num_images=test_x.shape[0]
fooling_rate=float(np.sum(y_pred!=y_true)/float(num_images))
print('FOOLING RATE = ',fooling_rate)

In [None]:
def jacobian(y_flat,x,inds):
    n=10
    loop_vars=[
        tf.constant(0,tf.int32),
        tf.TensorArray(tf.float32,size=n)
    ]
    _,jocabian=tf.while_loop(
        lambda j,_:j<n,
        lambda j,result:(j+1,result.write(j,tf.gradients(y_flat[inds[j]],x))),
        loop_vars
    )
    return jacobian.stack()  # 提高一个维度

In [None]:
def deepfool(image,f,grads,num_class=10,overshoot=0.02,max_iter=50):
    f_image=np.array(f(image)).flatten()
    I=(np.array(f_image)).flatten().argsort()[::-1]
    I=I[0:num_class]
    label=I[0]
    input_shape=image.shape
    pert_image=image
    f_i=np.array(f(per_image)).flatten()
    k_i=int(np.argmax(f_i))
    w=np.zeros(input_shape)
    r_tor=np.zeros(input_shape)
    loop_i=0
    while k_i==label and loop_i<max_iter:
        pert=np.inf
        gradients=np.asarray(grads(pert_image,I))
        for k in range(1:num_class):
            # set new w_k and new f_k
            # w_k=gradient[k,:,:,:,:]-gradients[0,:,:,:,:]
            w_k = gradients[k,:,:]-gradients[0,:,:]
            f_k = f_i[I[k]]-f_i[I[0]]
            pert_k=abs(f_k)/np.linalg.norm(w_k.flatten())
            # determine which w_k to use
            if pert_k<pert:
                pert=pert_k
                w=w_k
        # compute r_i and r_tot
        r_i=pert*w/np.linalg.norm(w)
        r_tot=r_tot+r_i
        # compute new perturbed image
        pert_image=image+(1+overshoot)*r_tot
        loop_i += 1
        # compute new label
        f_i=np.array(f(pert_image)).flatten()
        k_i=int(np.argmax(f_i))
    r_tot=(1+overshoot)*r_tot
    return r_tot,loop_i,k_i,pert_image

In [4]:
def proj_lp(v,xi,p):
    # project on the lp ball centered at 0 and of radies xi
    # supports only p=2 and p=Inf for now
    if p==2:
        v=v*min(1,xi/np.linalg.norm(v.flatten(1)))
    elif p==np.inf:
        v=np.sign(v)*np.minimum(abs(v),xi)
    else:
        raise ValueError('Values of p different from 2 and Inf are currently not supported...')
    return v
def universal_perturbation(dataset,f,grads,delta=0.2,max_iter_uni = np.inf, xi=10, p=np.inf, num_classes=10, overshoot=0.02, max_iter_df=10):
    v=0
    fooling_rate=0.0
    num_images = np.shape(dataset)[0] # The images should be stacked ALONG
    print('X size:{}'.format(num_images))
    itr=0
    while fooling_rate <1-delta and itr<max_iter_uni:
        # shuffle the dataset
        np.random.shuffle(dataset)
        print('Starting pass number ',itr)
        # Go through the data set and compute the perturbation inrements
        # sequentially
        for k in range(0,num_images):
            cur_img=dataset[k:(k+1),:]
            if int(np.argmax(np.array(f(cur_img)).flatten()))==int(np.argmax(np.array(f(cur_img+v)).flatten())):
               # Compute adversarial pertubation
                de,iter,_,_=deepfool(cur_img + v, f, grads, num_classes=num_classes, overshoot=overshoot, max_iter=max_iter_df)
                # make sure it converged
                if iter<max_iter_df-1:
                    v=v+dr
                    # project on l_p ball
                    v=proj_lp(v,xi,p)
        itr=itr+1
        # pertub the dataset with computed perturbation 
        dataset_perturbed=dataset+v
        est_labels_pert=np.argmax(f(dataset_perturbed),axis=1)
        est_labels_orig=np.argmax(f(dataset),axis=1)
        # compute the fooling rate
        fooling_rate = float(np.sum(est_labels_pert != est_labels_orig) / float(num_images))
        print('FOOLING RATE = ', fooling_rate)
    return v

In [None]:
X=test_x.copy()

y_flat = tf.reshape(persisted_output, (-1,))
inds = tf.placeholder(tf.int32, shape=(10,))
dydx = jacobian(y_flat,persisted_input,inds)

print('>> Computing gradient function...')
def grad_fs(image_inp, indices,keep_prob=1.0): 
    return persisted_sess.run(dydx, feed_dict={persisted_input: image_inp, inds: indices,persisted_keep_prob:keep_prob}).squeeze(axis=1)

# Running universal perturbation
v = universal_perturbation(X, f, grad_fs, delta=0.2,num_classes=10)

In [None]:
y_pred=np.argmax(f(test_x+v),axis=1)
y_true=np.argmax(test_y,axis=1)

print(accuracy_score(y_true, y_pred, normalize=True))

num_images=test_x.shape[0]

fooling_rate = float(np.sum(y_pred != y_true) / float(num_images))
print('FOOLING RATE = ', fooling_rate)