In [1]:
import gc
import random

import keras
import numpy as np
import seaborn as sns
import tensorflow as tf

tf.compat.v1.disable_eager_execution()

from art import config
from art.utils import load_dataset
from art.estimators.classification import KerasClassifier
from art.attacks.evasion import ProjectedGradientDescent
from art.defences.trainer import AdversarialTrainerMadryPGD
from art.data_generators import KerasDataGenerator
from art.defences.trainer import AdversarialTrainer

from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator


from matplotlib import pyplot as plt

from tqdm.auto import tqdm

## Params

In [2]:
# fix randomness
seed = 42

random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

# training parameters
n_epochs = 10
batch_size = 32
dataset_subsample = 0.5

## Data

In [3]:
# load data
(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('mnist')

# Subsample the data set for speed
_, x_train_s = train_test_split(x_train, test_size=dataset_subsample, random_state=seed, stratify=y_train)
_, x_test_s = train_test_split(x_test, test_size=dataset_subsample, random_state=seed, stratify=y_test)
_, y_train_s = train_test_split(y_train, test_size=dataset_subsample, random_state=seed, stratify=y_train)
_, y_test_s = train_test_split(y_test, test_size=dataset_subsample, random_state=seed, stratify=y_test)

# labels as integers
y_test_s_labels = np.argmax(y_test_s, axis=-1)

print(x_train_s.shape, x_test_s.shape, y_train_s.shape, y_test_s.shape)

(30000, 28, 28, 1) (5000, 28, 28, 1) (30000, 10) (5000, 10)


## Model

In [4]:
def build_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',kernel_initializer='he_normal',input_shape=input_shape))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu',padding='same',kernel_initializer='he_normal'))
    model.add(MaxPool2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    

    model.compile(
        loss=keras.losses.categorical_crossentropy,
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        metrics=['accuracy']
    )
    
    return model

In [14]:
# model = build_model(input_shape=(28, 28, 1), num_classes=10)
# model.fit(x_train_s, y_train_s, epochs=10)
# safety_pred = np.argmax(model.predict(x_test_s), axis=-1)
# print('Accuracy on clean testing data')
# print(classification_report(y_test_s_labels, safety_pred, digits=5))

Train on 30000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on clean testing data
              precision    recall  f1-score   support

           0    0.98580   0.99184   0.98881       490
           1    0.98261   0.99647   0.98949       567
           2    0.98649   0.99031   0.98839       516
           3    0.99008   0.98812   0.98910       505
           4    0.98776   0.98574   0.98675       491
           5    0.97333   0.98206   0.97768       446
           6    0.98337   0.98747   0.98542       479
           7    0.98630   0.98054   0.98341       514
           8    0.99374   0.97741   0.98551       487
           9    0.98597   0.97426   0.98008       505

    accuracy                        0.98560      5000
   macro avg    0.98554   0.98542   0.98546      5000
weighted avg    0.98563   0.98560   0.98559      5000



## Adversarial training

In [5]:
model = build_model(input_shape=(28, 28, 1), num_classes=10)

print(model.summary())

# Wrappers for ART
classifier = KerasClassifier(
    model=model,
    clip_values=(0, 1),
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 2304)              0         
_________________________________________________________________
dense (Dense)                (None, 1024)              2360320   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1

In [None]:
%%time

# Create adversarial trainer and perform adversarial training
trainer = AdversarialTrainerMadryPGD(
    classifier,
#     nb_epochs=n_epochs
)
trainer.fit(x_train_s, y_train_s)

HBox(children=(FloatProgress(value=0.0, description='Precompute adv samples', max=1.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Adversarial training epochs', max=391.0, style=ProgressSt…

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
Please use Model.fit, which supports generators.


In [11]:
# Evaluate the adversarially trained model on clean test set
test_pred = np.argmax(classifier.predict(x_test_s), axis=-1)
print('Accuracy on clean testing data')
print(classification_report(y_test_s_labels, test_pred, digits=5))
print(classification_report(y_test_s_labels, np.argmax(trainer.trainer.predict(x_test_s), axis=-1), digits=5))

Accuracy on clean testing data
              precision    recall  f1-score   support

           0    0.00000   0.00000   0.00000       490
           1    0.07227   0.07584   0.07401       567
           2    0.00000   0.00000   0.00000       516
           3    0.00000   0.00000   0.00000       505
           4    0.00000   0.00000   0.00000       491
           5    0.00000   0.00000   0.00000       446
           6    0.00000   0.00000   0.00000       479
           7    0.10352   0.88716   0.18540       514
           8    0.00000   0.00000   0.00000       487
           9    0.00000   0.00000   0.00000       505

    accuracy                        0.09980      5000
   macro avg    0.01758   0.09630   0.02594      5000
weighted avg    0.01884   0.09980   0.02745      5000

              precision    recall  f1-score   support

           0    0.00000   0.00000   0.00000       490
           1    0.07227   0.07584   0.07401       567
           2    0.00000   0.00000   0.00000    

In [None]:
# Attacker object
pgd = ProjectedGradientDescent(
    classifier,
    eps=0.3,
    eps_step=0.01,
    max_iter=40,
    targeted=False,
    num_random_init=True
)

In [None]:
%%time

# Evaluate the adversarially trained model on fresh adversarial samples produced on the adversarially trained model
x_test_pgd_new = pgd.generate(x_test_s[:100])

In [12]:
labels_pgd_new = np.argmax(classifier.predict(x_test_pgd_new), axis=-1)
print(classification_report(y_test_s_labels[:100], labels_pgd_new, digits=5))

              precision    recall  f1-score   support

           0    0.00000   0.00000   0.00000        14
           1    0.12371   0.92308   0.21818        13
           2    0.00000   0.00000   0.00000        10
           3    0.00000   0.00000   0.00000         8
           4    0.00000   0.00000   0.00000        10
           5    0.00000   0.00000   0.00000         7
           6    0.00000   0.00000   0.00000        12
           7    0.00000   0.00000   0.00000        10
           8    0.00000   0.00000   0.00000         9
           9    0.00000   0.00000   0.00000         7

    accuracy                        0.12000       100
   macro avg    0.01237   0.09231   0.02182       100
weighted avg    0.01608   0.12000   0.02836       100

