In [1]:
#Simple CNN Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense

class SimpleCNN:
    @staticmethod
    def build(width, height, depth, classes):
        # initialize model with input shape
        model = Sequential()
        input_shape = (height, width, depth)
        chan_dim = -1
        
        # first CONV=>RELU=>BN layer set
        model.add(Conv2D(32, (3,3), strides=(2,2), padding='same', input_shape=input_shape))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=chan_dim))
        
        # Second CONV=>RELU=>BN layer set
        model.add(Conv2D(64, (3,3), strides=(2,2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=chan_dim))
        
        # First and only set of FC=>RELU layers
        model.add(Flatten())
        model.add(Dense(128))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        
        # softmax classifier
        model.add(Dense(classes))
        model.add(Activation('softmax'))
        
        return model

In [2]:
# Generate Adversarial image
from tensorflow.keras.losses import MSE
import tensorflow as tf

def generate_image_adversary(model, image, label, eps=2/255.0):
    # cast the image
    image = tf.cast(image, tf.float32)
    
    # record gradients
    with tf.GradientTape() as tape:
        # explicitly indicate that our image should be tracked for growing gradients
        tape.watch(image)
        
        # use model to make predictions on input image and compute loss
        pred = model(image)
        loss = MSE(label, pred)
        
    # calculate gradients of loss w.r.t image then compute sign of gradient
    gradient = tape.gradient(loss, image)
    signed_grad = tf.sign(gradient)
    
    # construct the image adversary
    adversary = (image + (signed_grad * eps)).numpy()
    
    return adversary

In [3]:
# generating adversary images batch
import numpy as np

def generate_adversarial_batch(model, total, images, labels, dims, eps=0.01):
    # unpack image dimensions into convienence variables
    (h, w, c) = dims
    
    # since we are constructing a data generator here so we need to loop indefinitely
    while True:
        
        # initialize our preturbed images and labels
        preturbed_images = []
        preturbed_labels = []
        
        # randomly sample indexes (without replacement) from input data
        idx = np.random.choice(range(0, len(images)), size=total, replace=False)
            
        # loop over the indexes
        for i in idx:
                
            # grab current image and label
            image = images[i]
            label = labels[i]
                
            # generate an adversarial image
            adversary = generate_image_adversary(model, image.reshape(1, h, w, c),
                                                    label, eps)
                
            # update preturbed images and label lists
            preturbed_images.append(adversary.reshape(h, w, c))
            preturbed_labels.append(label)
                
        #yield preturbed images and labels
        yield (np.array(preturbed_images), np.array(preturbed_labels))

In [4]:
# training
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
import numpy as np
import cv2 as cv

# load mnist dataset and scale the pixel values to range [0, 1]
print('[INFO] loading MNIST dataset...')
(trainX, trainY), (testX, testY) = mnist.load_data()
trainX = trainX / 255.0
testX = testX / 255.0

# add a chennel dimension to images
trainX = np.expand_dims(trainX, axis=-1)
testX = np.expand_dims(testX, axis=-1)

# one hot encode labels
trainY = to_categorical(trainY, 10)
testY = to_categorical(testY, 10)

# initialize optimizer and model
print('[INFO] Compiling model...')
opt = Adam(lr=1e-3)
model = SimpleCNN.build(width=28, height=28, depth=1, classes=10)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# train simple CNN on MNIST
print('[INFO] Training Model...')
model.fit(trainX, trainY, validation_data=(testX, testY), batch_size=64, 
          epochs=20, verbose=1)

# make predictions on testing set for model trained on non-adversarial images
(loss, acc) = model.evaluate(x=testX, y=testY, verbose=0)
print(f'[INFO] loss: {loss:.4f}, accuracy: {acc:.4f}')

[INFO] loading MNIST dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[INFO] Compiling model...


  super(Adam, self).__init__(name, **kwargs)


[INFO] Training Model...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[INFO] loss: 0.0401, accuracy: 0.9890


In [5]:
# Testing on adversarial images

# generate a set of adversarial from our test set
print('[INFO] generating adversarial from our test set')
(advX, advY) = next(generate_adversarial_batch(model, len(testX), testX, testY, 
                                               (28, 28, 1), eps=0.1))

# re-evaluate model on adversarial images
(loss, acc) = model.evaluate(x=advX, y=advY, verbose=0)
print('[INFO] adversarial testing images:')
print(f'[INFO] loss: {loss:.4f}, accuracy: {acc:.4f}')

# since accuracy has dropped on the adversarial images we will fine-tune our model 
#on the adversarial images
# lower learning rate and re-compile model so we can fine tune it on adversairal images
print('[INFO] re-compiling model...')
opt = Adam(lr = 1e-4)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# fine tuning model on adversarial images
print('[INFO] fine-tuning network on adversarial images...')
model.fit(advX, advY, batch_size=64, epochs=15, verbose=1)


# now that our model is fine tuned we should evaluate it on test set (non-adversarial)
# to check if performance has degraded
(loss, acc) = model.evaluate(x=testX, y=testY, verbose=0)
print('')
print('[INFO] testing on normal images after fine-tuning:')
print(f'[INFO] loss: {loss:.4f}, accuracy: {acc:.4f}')

# final evaluation of model on adversarial images
(loss, acc) = model.evaluate(x=advX, y=advY, verbose=0)
print('[INFO] testing on adversarial images after fine-tuning:')
print(f'[INFO] loss: {loss:.4f}, accuracy: {acc:.4f}')

[INFO] generating adversarial from our test set
[INFO] adversarial testing images:
[INFO] loss: 13.0508, accuracy: 0.0166
[INFO] re-compiling model...
[INFO] fine-tuning network on adversarial images...
Epoch 1/15


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15

[INFO] testing on normal images after fine-tuning:
[INFO] loss: 0.0288, accuracy: 0.9909
[INFO] testing on adversarial images after fine-tuning:
[INFO] loss: 0.0270, accuracy: 0.9935
