### Prerequisites
- cuda/9.0.176
- cudnn/v7.2.1
- tensorflow-gpu==1.12
- tensorflow-probability==0.5.0
- keras==2.2.4

In [8]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

In [9]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import os

import tensorflow as tf
from tensorflow import keras
import numpy as np

from cleverhans.attacks import FastGradientMethod
from cleverhans.dataset import MNIST
from cleverhans.loss import CrossEntropy
from cleverhans.train import train
from cleverhans.utils import AccuracyReport
from cleverhans.utils_keras import cnn_model
from cleverhans.utils_keras import KerasModelWrapper
from cleverhans.utils_tf import model_eval

In [10]:
# Check GPU availability
print(f"GPU Available: {tf.test.is_gpu_available()}")
print(f"GPU Name: {tf.test.gpu_device_name()}")

GPU Available: True
GPU Name: /device:GPU:0


In [11]:
NB_EPOCHS = 6
BATCH_SIZE = 128
LEARNING_RATE = .001
TRAIN_DIR = 'train_dir'
FILENAME = 'mnist.ckpt'
LOAD_MODEL = False

In [12]:
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=False, label_smoothing=0.1):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    tf.keras.backend.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if keras.backend.image_data_format() != 'channels_last':
        raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start, train_end=train_end,
                  test_start=test_start, test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess, loss, x_train, y_train, evaluate=evaluate,
              args=train_params, rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, x_train,
                         y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_test, y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, x_test,
                              y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess, loss_2, x_train, y_train, evaluate=evaluate_2,
        args=train_params, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_train, y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, x_train,
                              y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report

In [13]:
accuracy_report = mnist_tutorial()

Defined TensorFlow model graph.
train_dir None
Model was not loaded, training from scratch.
num_devices:  1


[INFO 2020-11-22 22:37:04,827 cleverhans] Epoch 0 took 2.4613442420959473 seconds


Test accuracy on legitimate examples: 0.9885


[INFO 2020-11-22 22:37:07,538 cleverhans] Epoch 1 took 2.265336036682129 seconds


Test accuracy on legitimate examples: 0.9918


[INFO 2020-11-22 22:37:10,197 cleverhans] Epoch 2 took 2.339454412460327 seconds


Test accuracy on legitimate examples: 0.9932


[INFO 2020-11-22 22:37:12,850 cleverhans] Epoch 3 took 2.3257811069488525 seconds


Test accuracy on legitimate examples: 0.9927


[INFO 2020-11-22 22:37:15,408 cleverhans] Epoch 4 took 2.235194444656372 seconds


Test accuracy on legitimate examples: 0.9926


[INFO 2020-11-22 22:37:18,055 cleverhans] Epoch 5 took 2.34348201751709 seconds


Test accuracy on legitimate examples: 0.9938
Test accuracy on adversarial examples: 0.0710

Repeating the process, using adversarial training




num_devices:  1


[INFO 2020-11-22 22:37:26,488 cleverhans] Epoch 0 took 5.18956995010376 seconds


Test accuracy on legitimate examples: 0.9745
Test accuracy on adversarial examples: 0.8318


[INFO 2020-11-22 22:37:32,712 cleverhans] Epoch 1 took 4.947321891784668 seconds


Test accuracy on legitimate examples: 0.9853
Test accuracy on adversarial examples: 0.8509


[INFO 2020-11-22 22:37:38,568 cleverhans] Epoch 2 took 5.012603521347046 seconds


Test accuracy on legitimate examples: 0.9887
Test accuracy on adversarial examples: 0.8740


[INFO 2020-11-22 22:37:44,239 cleverhans] Epoch 3 took 4.898103713989258 seconds


Test accuracy on legitimate examples: 0.9903
Test accuracy on adversarial examples: 0.9019


[INFO 2020-11-22 22:37:50,131 cleverhans] Epoch 4 took 5.044625520706177 seconds


Test accuracy on legitimate examples: 0.9910
Test accuracy on adversarial examples: 0.9169


[INFO 2020-11-22 22:37:55,912 cleverhans] Epoch 5 took 5.038610935211182 seconds


Test accuracy on legitimate examples: 0.9912
Test accuracy on adversarial examples: 0.9280


In [14]:
print(f"Clean Train Clean Eval: {accuracy_report.clean_train_clean_eval}")
print(f"Clean Train Adv Eval: {accuracy_report.clean_train_adv_eval}")
print(f"Adv Train Clean Eval: {accuracy_report.adv_train_clean_eval}")
print(f"Adv Train Adv Eval: {accuracy_report.adv_train_adv_eval}")

Clean Train Clean Eval: 0.9938
Clean Train Adv Eval: 0.071
Adv Train Clean Eval: 0.9912
Adv Train Adv Eval: 0.928
