In [1]:
import tensorflow as tf
from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
from tensorflow.keras.models import Model, load_model

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

import os
import numpy as np
from model import resnet_v1

In [2]:
# parameters
num_classes = 10
lamda = 2.0
log_det_lamda = 0.5
augmentation = False
filepath = 'cifar10_ResNet20v1_model.159.h5'

n = 3
depth = n * 6 + 2
version = 1

input_shape = (32, 32, 3)

In [3]:
model_input = Input(shape=input_shape)
model_dic = {}
model_out = []
for i in range(3):
    model_dic[str(i)] = resnet_v1(input=model_input, depth=depth, num_classes=num_classes, dataset='cifar10')
    model_out.append(model_dic[str(i)][2])
model_output = tf.keras.layers.concatenate(model_out)
model = Model(inputs=model_input, outputs=model_output)
model_ensemble = tf.keras.layers.Average()(model_out)
model_ensemble = Model(inputs=model_input, outputs=model_ensemble)

# load model
model.load_weights(filepath)

# compile model
model_ensemble.compile('sgd')



In [4]:
# Subtracting pixel mean improves accuracy
subtract_pixel_mean = True

import keras
from keras.datasets import cifar10

# Load the data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# If subtract pixel mean is enabled
clip_min = 0.0
clip_max = 1.0
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean
    clip_min -= x_train_mean
    clip_max -= x_train_mean
    
y_test, y_train = y_test.flatten(), y_train.flatten()

Using TensorFlow backend.


In [5]:
pred = model_ensemble.predict(x_test[:500]).argmax(1)
np.mean(pred == y_test[:500])

0.938

In [6]:
# convert to Foolbox native model
import foolbox.ext.native as fbn

In [7]:
fmodel = fbn.models.TensorFlowModel(model_ensemble, bounds=(-2, 2))

In [8]:
x_batch = tf.convert_to_tensor(x_test[:250], dtype=tf.float32)
y_batch = tf.convert_to_tensor(y_test[:250])

### PGD attack

In [95]:
attack = fbn.attacks.ProjectedGradientDescentAttack(fmodel)

In [94]:
adversarials = attack(x_batch, y_batch, epsilon=0.1, step_size=0.005, num_steps=20)  # L-inf norm
print(fbn.utils.accuracy(fmodel, adversarials, y_batch))

0.0


#### no repeats

In [105]:
epsilon = 0.01
steps = [10, 20, 50, 100, 250, 500, 1000]
pgd_accs = []
for num_steps in steps:
    step_size = max(epsilon / num_steps * 3, 0.0005)
    adversarials = attack(x_batch, y_batch, epsilon=0.01, step_size=step_size, num_steps=num_steps)
    pgd_accs.append(fbn.utils.accuracy(fmodel, adversarials, y_batch))
    print(f'finished {num_steps} with {pgd_accs[-1]} accuracy')

finished 10 with 0.2639999985694885 accuracy
finished 20 with 0.23600000143051147 accuracy
finished 50 with 0.19599999487400055 accuracy
finished 100 with 0.15600000321865082 accuracy
finished 250 with 0.09200000017881393 accuracy
finished 500 with 0.08399999886751175 accuracy
finished 1000 with 0.09200000017881393 accuracy


#### repeats

In [106]:
import eagerpy as ep

def combine(fmodel, label, *args):
    masks = []
    for adversarials in args:
        pred_pgd_label = ep.astensor(fmodel.forward(adversarials)).argmax(1).numpy()
        masks.append(pred_pgd_label == label)
        
    return np.stack(masks).all(0)

In [107]:
pgd_accs_repeats = []
for num_steps in steps:
    step_size = max(epsilon / num_steps * 3, 0.0005)
    adversarials01 = attack(x_batch, y_batch, epsilon=0.01, step_size=step_size, num_steps=num_steps)
    adversarials02 = attack(x_batch, y_batch, epsilon=0.01, step_size=step_size, num_steps=num_steps)
    adversarials03 = attack(x_batch, y_batch, epsilon=0.01, step_size=step_size, num_steps=num_steps)
    mask = combine(fmodel, y_batch, adversarials01, adversarials02, adversarials03)
    
    pgd_accs_repeats.append(mask.mean())
    print(f'finished {num_steps} with {pgd_accs_repeats[-1]} accuracy')

finished 10 with 0.236 accuracy
finished 20 with 0.168 accuracy
finished 50 with 0.156 accuracy
finished 100 with 0.112 accuracy
finished 250 with 0.068 accuracy
finished 500 with 0.064 accuracy
finished 1000 with 0.052 accuracy


In [129]:
pgd = fbn.attacks.ProjectedGradientDescentAttack(fmodel)
init_adversarials = pgd(x_batch, y_batch, epsilon=0.15, step_size=0.005, num_steps=20)  # L-inf norm
print(fbn.utils.accuracy(fmodel, init_adversarials, y_batch))

0.0


In [134]:
# find good starting values
while True:
    pgd = fbn.attacks.ProjectedGradientDescentAttack(fmodel)
    init_adversarials = pgd(x_batch, y_batch, epsilon=0.15, step_size=0.005, num_steps=20)  # L-inf norm
    if fbn.utils.accuracy(fmodel, init_adversarials, y_batch) == 0.0:
        break

attack = fbn.attacks.LinfinityBrendelBethgeAttack(fmodel)
adversarials = attack(x_batch, y_batch, steps=20, starting_points=init_adversarials)  # L-inf norm

mask = np.abs((x_batch - adversarials).numpy()).reshape((100, -1)).max(1) > 0.01
print(mask.mean())

before starting points
got starting points
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
0.11


### Final attack

In [55]:
def comb_attack(x_batch, y_batch, steps=20):
    while True:
        pgd = fbn.attacks.ProjectedGradientDescentAttack(fmodel)
        init_adversarials = pgd(x_batch, y_batch, epsilon=0.15, step_size=0.005, num_steps=20)  # L-inf norm
        if fbn.utils.accuracy(fmodel, init_adversarials, y_batch) == 0.0:
            break

    attack = fbn.attacks.LinfinityBrendelBethgeAttack(fmodel)
    adversarials = attack(x_batch, y_batch, steps=steps, starting_points=init_adversarials)  # L-inf norm
    return np.abs((x_batch - adversarials).numpy()).reshape((x_batch.shape[0], -1)).max(1) > 0.01, adversarials

In [56]:
from tqdm import tqdm
mask = np.array([True] * x_batch.shape[0])
keep_masks = []
total_adversarials = np.empty(x_batch.shape, dtype=np.float32)

for k in tqdm(range(20)):
    new_mask, adversarials = comb_attack(x_batch, y_batch)
    total_adversarials[mask] = ep.astensor(adversarials).numpy()[mask]
    keep_masks.append(new_mask)
    mask[mask] = new_mask[mask]
    print(k, mask.sum(), mask.mean(), np.stack(keep_masks).all(0).mean())





  0%|          | 0/50 [00:00<?, ?it/s][A[A[A[A



  2%|▏         | 1/50 [00:26<21:27, 26.28s/it][A[A[A[A

0 14 0.056 0.056






  4%|▍         | 2/50 [00:52<20:53, 26.12s/it][A[A[A[A

1 6 0.024 0.024






  6%|▌         | 3/50 [01:17<20:19, 25.94s/it][A[A[A[A

2 4 0.016 0.016






  8%|▊         | 4/50 [01:43<19:51, 25.90s/it][A[A[A[A

3 3 0.012 0.012






 10%|█         | 5/50 [02:08<19:08, 25.53s/it][A[A[A[A

4 2 0.008 0.008






 12%|█▏        | 6/50 [02:33<18:38, 25.42s/it][A[A[A[A

5 2 0.008 0.008






 14%|█▍        | 7/50 [02:58<18:12, 25.42s/it][A[A[A[A

6 1 0.004 0.004






 16%|█▌        | 8/50 [03:24<17:56, 25.64s/it][A[A[A[A

7 1 0.004 0.004






 18%|█▊        | 9/50 [03:50<17:28, 25.58s/it][A[A[A[A

8 1 0.004 0.004






 20%|██        | 10/50 [04:19<17:45, 26.63s/it][A[A[A[A

9 1 0.004 0.004






 22%|██▏       | 11/50 [04:44<16:56, 26.06s/it][A[A[A[A

10 0 0.0 0.0






 24%|██▍       | 12/50 [05:09<16:29, 26.04s/it][A[A[A[A

11 0 0.0 0.0






 26%|██▌       | 13/50 [05:35<15:59, 25.92s/it][A[A[A[A

12 0 0.0 0.0






 28%|██▊       | 14/50 [06:00<15:23, 25.66s/it][A[A[A[A

13 0 0.0 0.0


KeyboardInterrupt: 