REQUIREMENT

In [0]:
import tensorflow as tf

from tensorflow.keras.datasets import mnist, cifar10
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Flatten, Activation

import numpy as np
import random


import matplotlib.pyplot as plt

DATA PROCESSING


In [0]:

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
img_rows, img_cols, channels = 32, 32, 3
num_classes = 10

x_train = x_train / 255
x_test = x_test / 255

x_train = x_train.reshape((-1, img_rows, img_cols, channels))
x_test = x_test.reshape((-1, img_rows, img_cols, channels))

y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

print("Data shapes", x_test.shape, y_test.shape, x_train.shape, y_train.shape)

Data shapes (10000, 32, 32, 3) (10000, 10) (50000, 32, 32, 3) (50000, 10)


SIMPLE MODEL CREATION

In [0]:
def create_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(4, 4), strides=(3, 3), padding='same', activation='relu', input_shape=(img_rows, img_cols, channels)))
    model.add(Conv2D(128, kernel_size=(3, 3), strides=(3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, kernel_size=(3, 3), strides=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(30))
    model.add(Dropout(0.3))
    model.add(Dense(20))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
    
    return  model
  

In [0]:
model = create_model()
model.summary()


NORMAL TRAINING

In [0]:
model.fit(x_train, y_train,
          batch_size=256,
          epochs=30,
          validation_data=(x_test, y_test))

EVALUATION

In [0]:
print("Base accuracy on regular images:", model.evaluate(x=x_test, y=y_test, verbose=0))

FGSM

In [0]:
def adversarial_pattern(image, label):
    image = tf.cast(image, tf.float32)
    
    with tf.GradientTape() as tape:
        tape.watch(image)
        prediction = model(image)
        loss = tf.keras.losses.MSE(label, prediction)
    
    gradient = tape.gradient(loss, image)
    
    signed_grad = tf.sign(gradient)
    
    return signed_grad

GENERATOR OF ADVERSARIAL EXAMPLES

In [0]:
def generate_adversarials(batch_size):
    while True:
        x = []
        y = []
        for batch in range(batch_size):
            N = random.randint(0, 100)

            label = y_train[N]
            image = x_train[N]
            
            perturbations = adversarial_pattern(image.reshape((1, img_rows, img_cols, channels)), label).numpy()
            
            
            epsilon = 0.1
            adversarial = image + perturbations * epsilon
            
            x.append(adversarial)
            y.append(y_train[N])
        
        
        x = np.asarray(x).reshape((batch_size, img_rows, img_cols, channels))
        y = np.asarray(y)
        
        yield x, y

HOW CHOICE EPSYLON ε ?

In [0]:
def generate_adversarials_find_EPSYLON(batch_size):
    while True:
        x = []
        y = []
        z= []
        for batch in range(batch_size):
            N = random.randint(0, 100)

            label = y_train[N]
            image = x_train[N]
            
            perturbations = adversarial_pattern(image.reshape((1, img_rows, img_cols, channels)), label).numpy()
            
            
            epsilon = 0.05
            adversarial = image + perturbations * epsilon
           
            

            x.append(adversarial)
            y.append(y_train[N])
            z.append(image)
        
        
        x = np.asarray(x).reshape((batch_size, img_rows, img_cols, channels))
        y = np.asarray(y)    
        z = np.asarray(z).reshape((batch_size, img_rows, img_cols, channels))
        
        yield x, y, z



        adversarials, correct_labels, images = next(generate_adversarials_(4))
for adversarial, correct_label, image  in zip(adversarials, correct_labels, images):
    print('Prediction:', labels[model.predict(adversarial.reshape((1, img_rows, img_cols, channels))).argmax()], 'Truth:', labels[correct_label.argmax()])
    #show  perturbated images
    if channels == 1:
        plt.imshow(adversarial.reshape(img_rows, img_cols))
    else:
        plt.imshow(adversarial)
    plt.show()
    #show  original images
    if channels == 1:
        plt.imshow(image.reshape(img_rows, img_cols))
    else:
        plt.imshow(image)
    plt.show()

     #see the results and choice an ε  which keeps the images similar to a human observation

GENERATION OF ADVERSARIAL EXAMPLES AND EVALUATION OF ADVERSARIAL EXAMPLES IN A NORMAL MODEL 

In [0]:
x_adversarial_train, y_adversarial_train = next(generate_adversarials(20000))
x_adversarial_test, y_adversarial_test = next(generate_adversarials(10000))

print("Base accuracy on adversarial images:", model.evaluate(x=x_adversarial_test, y=y_adversarial_test, verbose=0))

ADVERSARIAL TRAINING

In [0]:
model.fit(x_adversarial_train, y_adversarial_train,
          batch_size=256,
          epochs=30,
          validation_data=(x_test, y_test))

EVALUATION IN A RE-TRAINED MODEL

In [0]:
print("Defended accuracy on adversarial images:", model.evaluate(x=x_adversarial_test, y=y_adversarial_test, verbose=0))


print("Defended accuracy on regular images:", model.evaluate(x=x_test, y=y_test, verbose=0))


new_x_adversarial_test, new_y_adversarial_test = next(generate_adversarials(10000))
print("Defended accuracy on adversarial new images:", model.evaluate(x=new_x_adversarial_test, y=new_y_adversarial_test, verbose=0))

TRAIN VGG16