<a href="https://colab.research.google.com/github/wielandbrendel/robustness_workshop/blob/master/03_diversity/diversity_attack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# this cell contains all the commands necessary to run this notebook in colab
# if you cloned the repository and run this notebook locally you do not need to run this command
!wget https://raw.githubusercontent.com/wielandbrendel/robustness_workshop/master/03_diversity/model.py

In [0]:
# load pretrained model weights
!wget https://github.com/wielandbrendel/robustness_workshop/releases/download/v0.0.1/diversity_cifar10_ResNet20v1_model.159.h5

In [0]:
# install the latest master version of Foolbox 3.0
!pip3 install git+https://github.com/bethgelab/foolbox.git

In [0]:
!pip install --upgrade tensorflow==2.0.0

In [0]:
import tensorflow as tf

from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
from tensorflow.keras.models import Model, load_model

import os
import numpy as np
import foolbox as fb
from model import resnet_v1

In [0]:
# parameters
num_classes = 10
lamda = 2.0
log_det_lamda = 0.5
augmentation = False
filepath = 'diversity_cifar10_ResNet20v1_model.159.h5'

n = 3
depth = n * 6 + 2
version = 1

input_shape = (32, 32, 3)

In [0]:
model_input = Input(shape=input_shape)
model_dic = {}
model_out = []
for i in range(3):
    model_dic[str(i)] = resnet_v1(input=model_input, depth=depth, num_classes=num_classes, dataset='cifar10')
    model_out.append(model_dic[str(i)][2])
model_output = tf.keras.layers.concatenate(model_out)
model = Model(inputs=model_input, outputs=model_output)
model_ensemble = tf.keras.layers.Average()(model_out)
model_ensemble = Model(inputs=model_input, outputs=model_ensemble)

# load model
model.load_weights(filepath)

# compile model
model_ensemble.compile('sgd')

In [0]:
# Subtracting pixel mean improves accuracy
subtract_pixel_mean = True

import keras
from keras.datasets import cifar10

# Load the data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test[:200].astype('float32') / 255

# If subtract pixel mean is enabled
clip_min = 0.0
clip_max = 1.0
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean
    clip_min -= x_train_mean
    clip_max -= x_train_mean
    
y_test, y_train = y_test[:200].flatten(), y_train.flatten()

In [0]:
pred = model_ensemble.predict(x_test).argmax(1)
print(f'Clean accuracy: {np.mean(pred == y_test):.3f}')

In [0]:
# convert to Foolbox model
fmodel = fb.models.TensorFlowModel(model_ensemble, bounds=(-2, 2))

In [0]:
images = tf.convert_to_tensor(x_test, dtype=tf.float32)
labels = tf.convert_to_tensor(y_test)

### Baseline attack

In [0]:
epsilon = 0.01

acc = 0
total_images = 0

for images, labels in zip(np.split(x_test, 10), np.split(y_test, 10)):
    images = tf.convert_to_tensor(images, dtype=tf.float32)
    labels = tf.convert_to_tensor(labels)

    attack = fb.attacks.LinfPGD(steps=10, abs_stepsize=epsilon/10)

    # PGD returns three values: (1) the raw adversarial images as returned by the
    # attack, (2) the raw adversarials clipped to the valid epsilon region and
    # (3) a boolean tensor indicating which perturbations are actually adversarial
    adv, adv_clipped, adv_mask = attack(fmodel, images, criterion=fb.criteria.Misclassification(labels), epsilons=epsilon)

    acc += (1 - adv_mask.numpy().mean()) * len(adv)
    total_images += len(adv)

print(f'Baseline attack accuracy: {acc / total_images:.3f}')