# Implementing adversarial images and attacks with Keras and TensorFlow

In [1]:
# import necessary packages
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.applications.resnet50 import decode_predictions
from tensorflow.keras.applications.resnet50 import preprocess_input
import tensorflow as tf
import numpy as np
import argparse
import cv2

* The "SparseCategoricalCrossentropy" computes the categorical cross-entropy loss between the labels and predictions. 
* By using the sparse version implementation of categorical cross-entropy, we do not have to explicitly one-hot encode our class labels like we would if we were using scikit-learn’s LabelBinarizer or Keras/TensorFlow’s to_categorical utility.

### preprocess_image

In [2]:
def preprocess_image(image):
	# swap color channels, resize the input image, and add a batch
	# dimension
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	# image = preprocess_input(image)
	image = cv2.resize(image, (224, 224))
	image = np.expand_dims(image, axis=0)
	# return the preprocessed image
	return image

 * leaving out the preprocess_input function call

### simple helper utility, clip_eps
* The goal of this function is to accept an input tensor and then clip any values inside the input to the range [-eps, eps]

In [3]:
def clip_eps(tensor, eps):
	# clip the values of the tensor to a given range and return it
	# a = -eps if i<-eps else eps if i>eps else i
	return tf.clip_by_value(tensor, clip_value_min=-eps,clip_value_max=eps)

### generate_adversaries function, which is the meat of our adversarial attack:
#### This function accepts four required parameters, including an optional fifth one:

* model :  
Our ResNet50 model (you could swap in a different pre-trained model such as VGG16, MobileNet, etc. if you prefer).
* baseImage (tf.constant):  
The original non-perturbed input image that we wish to construct an adversarial attack for, causing our model to misclassify it.
* delta (tf.Variable):  
Our noise vector, which will be added to the baseImage, ultimately causing the misclassification. We’ll update this delta vector by means of gradient descent.
* classIdx (int):  
The integer class label index we obtained by running the predict_normal.py
script.
* steps (int):  
Number of gradient descent steps to perform (defaults to 50
steps).


In [1]:
def generate_adversaries(model, baseImage, delta, classIdx, steps=50):
	# iterate over the number of steps
	for step in range(0, steps):
		# record our gradients
		with tf.GradientTape() as tape:
			# explicitly indicate that our perturbation vector should
			# be tracked for gradient updates
			tape.watch(delta)
			# add our perturbation vector to the base image and
			# preprocess the resulting image
			adversary = preprocess_input(baseImage + delta)
			# run this newly constructed image tensor through our
			# model and calculate the loss with respect to the
			# *original* class index
			predictions = model(adversary, training=False)
			loss = -sccLoss(tf.convert_to_tensor([classIdx]), predictions)
			# check to see if we are logging the loss value, and if
			# so, display it to our terminal
			if step % 5 == 0:
				print("step: {}, loss: {} ...".format(step, loss.numpy()))
		# calculate the gradients of loss with respect to the
		# perturbation vector
		gradients = tape.gradient(loss, delta)
        
		# update the weights, 
		optimizer.apply_gradients([(gradients, delta)])
		# clip the perturbation vector, and update its value 
		# delta += clip(delta)
		delta.assign_add(clip_eps(delta, eps=EPS))
	# return the perturbation vector
	return delta

* It’s far from random.Instead, the pixels in noise vector are “equal to the sign of the elements of the gradient of the cost function with the respect to the input image” (Goodfellow et al.).

In [33]:
# construct the argument parser and parse the arguments
# construct the argument parser and parse the arguments
image_in_path = 'pyimagesearch/pig.jpg'
image_out_path = 'pyimagesearch/adversarial.png'
#"ImageNet class ID of the predicted label"
class_idx = 341

In [34]:
# define the epsilon and learning rate constants
EPS = 2 / 255.0
LR = 0.1
# load the input image from disk and preprocess it
print("[INFO] loading image...")
image = cv2.imread(image_in_path)
image = preprocess_image(image)
print("[INFO] loading finished size is {}".format(image.shape))

[INFO] loading image...
[INFO] loading finished size is (1, 224, 224, 3)


* our epsilon (EPS) value used for clipping tensors when constructing the adversarial image. An EPS value of 2 / 255.0 is a standard value used in adversarial publications and tutorials
* A value of LR = 0.1 was obtained by empirical tuning — you may need to update this value when constructing your own adversarial images.

### load our ResNet model

In [35]:
# load the pre-trained ResNet50 model for running inference
print("[INFO] loading pre-trained ResNet50 model...")
model = ResNet50(weights="imagenet")
print("[INFO] ResNet50 model Loaded")
# initialize optimizer and loss function
optimizer = Adam(learning_rate=LR)
sccLoss = SparseCategoricalCrossentropy()


[INFO] loading pre-trained ResNet50 model...
[INFO] ResNet50 model Loaded


### Let’s now construct our adversarial image:

In [36]:
# create a "tensor based off" the input image and initialize the
# perturbation vector (we will update this vector via training)
# By default GradientTape will automatically watch any 
# "trainable variables" that are accessed inside the context
baseImage = tf.constant(image, dtype=tf.float32)
delta = tf.Variable(tf.zeros_like(baseImage), trainable=True)
# generate the perturbation vector to create an adversarial example
print("[INFO] generating perturbation...")
deltaUpdated = generate_adversaries(model, baseImage, delta, class_idx)


[INFO] generating perturbation...
step: 0, loss: -0.0005541696446016431 ...
step: 5, loss: -0.0057172346860170364 ...
step: 10, loss: -1.9002642631530762 ...
step: 15, loss: -8.364564895629883 ...
step: 20, loss: -15.761852264404297 ...
step: 25, loss: -16.118194580078125 ...
step: 30, loss: -16.118194580078125 ...
step: 35, loss: -16.118196487426758 ...
step: 40, loss: -16.118194580078125 ...
step: 45, loss: -16.118194580078125 ...


* The "generate_adversaries function" runs, updating the delta pertubration vector along the way, resulting in deltaUpdated, the final noise vector.

In [37]:
# create the adversarial example, swap color channels, and save the
# output image to disk
print("[INFO] creating adversarial example...")
adverImage = (baseImage + deltaUpdated).numpy().squeeze()
adverImage = np.clip(adverImage, 0, 255).astype("uint8")
adverImage = cv2.cvtColor(adverImage, cv2.COLOR_RGB2BGR)
cv2.imshow('adversarial', adverImage)
#cv2.waitKey(adverImage)
#cv2.imwrite(args["output"], adverImage)

[INFO] creating adversarial example...


### The real question is, can our newly constructed adversarial image fool our ResNet model?

In [None]:
# run inference with this adversarial example, parse the results,
# and display the top-1 predicted result
print("[INFO] running inference on the adversarial example...")
preprocessedImage = preprocess_input(baseImage + deltaUpdated)
predictions = model.predict(preprocessedImage)
predictions = decode_predictions(predictions, top=3)[0]
label = predictions[0][1]
confidence = predictions[0][2] * 100
print("[INFO] label: {} confidence: {:.2f}%".format(label,confidence))
# draw the top-most predicted label on the adversarial image along
# with the confidence score
text = "{}: {:.2f}%".format(label, confidence)
cv2.putText(adverImage, text, (3, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, 0), 2)
# show the output image
cv2.imshow("Output", adverImage)
cv2.waitKey(0)

[INFO] running inference on the adversarial example...
[INFO] label: Ibizan_hound confidence: 100.00%
