<a href="https://colab.research.google.com/github/MarinaChau/AttaCoeurs/blob/main/BlackBox_Attack_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
from google.colab import drive
drive.mount('myDrive')

Mounted at myDrive


In [5]:
!git clone https://github.com/MarinaChau/AttaCoeurs.git

Cloning into 'AttaCoeurs'...
remote: Enumerating objects: 155, done.[K
remote: Counting objects: 100% (151/151), done.[K
remote: Compressing objects: 100% (95/95), done.[K
remote: Total 155 (delta 66), reused 118 (delta 48), pack-reused 4[K
Receiving objects: 100% (155/155), 8.97 MiB | 27.93 MiB/s, done.
Resolving deltas: 100% (66/66), done.


# Project Adversarial Attack

In [6]:
%cd AttaCoeurs

/content/AttaCoeurs


In [7]:
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt

from AttaCoeurs.src import adversarial_attacks as attacks
from AttaCoeurs.src import custom_model as models



## 1.1 Code `∞-PGD attack & Observe robustness of neural networks


### STEP 1: Load Cifar10 Dataset

In [8]:
# Load Cifar dataset from keras
(x_train,y_train), (x_test,y_test) = tf.keras.datasets.cifar10.load_data()

# Preprocess
x_train = tf.constant(x_train.reshape(50000,32, 32,3).astype("float32") / 255)
x_test = tf.constant(x_test.reshape(10000, 32, 32, 3).astype("float32") / 255)

y_train = tf.constant(y_train.astype("float32"))
y_test = tf.constant(y_test.astype("float32"))



print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_test shape: {y_test.shape}")

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
x_train shape: (50000, 32, 32, 3)
y_train shape: (50000, 1)
x_test shape: (10000, 32, 32, 3)
y_test shape: (10000, 1)


In [9]:
dict = {0 : "airplane",
        1 : "automobile",
        2 :"bird",
        3 : "cat",
        4:  "deer",
        5: "dog",
        6 : "frog",
        7 : "horse",
        8 : "ship",
        9 : "truck"}

### STEP 2 Build a simpler Classifier

In [10]:
# Parameters for the architecture of convolutional neural networks
class CnnParams(object):
    def __init__(self):
        # Model hyperparameters
        self.input_shape = [32, 32, 3]
        self.num_classes = 10
        self.num_conv_filters = [32, 32, 64, 64, 128, 128]  # I made the network similar to VGG3 architecture + dropout
        self.kernel_size = (3, 3)
        self.pool_size = (2, 2)
        self.num_fc_units = [128]
        
cnn_params = CnnParams()


def get_cnn_model(adv_training_with=None, gaussian_noise=0.2):
    # Define Model layers
    inputs = tf.keras.Input(shape=cnn_params.input_shape,
                            dtype=tf.float32, name="image")

    x = inputs
    #x = tf.keras.layers.GaussianNoise(stddev=gaussian_noise)(x)
    
    # Convolutional layer followed by 
    for i, num_filters in enumerate(cnn_params.num_conv_filters):
        x = tf.keras.layers.Conv2D(
            num_filters, cnn_params.kernel_size, padding='same', activation='relu', kernel_initializer='he_uniform')(x)
        if i < len(cnn_params.num_conv_filters) - 1:
            # max pooling between convolutional layers + dropout
            x = tf.keras.layers.MaxPooling2D(cnn_params.pool_size)(x)
            # x = tf.keras.layers.Dropout(0.25)(x)            # OK, I got better results without the dropout - 63% to 77%
    
    x = tf.keras.layers.Flatten()(x)

    for num_units in cnn_params.num_fc_units:
        x = tf.keras.layers.Dense(num_units, activation='relu')(x)

    pred = tf.keras.layers.Dense(cnn_params.num_classes, activation='softmax')(x)
    

    return models.CustomModel(inputs=inputs, outputs=pred, 
                              adv_training_with=adv_training_with)



In [11]:
LOSS = tf.keras.losses.SparseCategoricalCrossentropy()
METRICS = [tf.keras.metrics.SparseCategoricalAccuracy]
OPTIMIZER = tf.keras.optimizers.RMSprop()

In [12]:
model = get_cnn_model()
model.summary()

Model: "custom_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image (InputLayer)          [(None, 32, 32, 3)]       0         
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 16, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 8, 8, 32)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 8, 8, 64)         

In [12]:
# Compile Model
model.compile(optimizer=OPTIMIZER,
              loss=LOSS, metrics=["accuracy"])
# Train Model 
history = model.fit(x_train, y_train,
          batch_size=32,
          epochs=80, validation_split=0.2)

# Evaluate Model
print("\n")
evaluation = model.evaluate(x_test,y_test, verbose=2)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


313/313 - 1s - loss: 1.4547 - accuracy: 0.6714 

In [15]:
evaluation = model.evaluate(x_test,y_test, verbose=2)


313/313 - 1s - loss: 1.4547 - accuracy: 0.6714 - 653ms/epoch - 2ms/step


In [17]:
!pwd

/content/AttaCoeurs


In [13]:
filepath = "/content/myDrive/MyDrive/Projet DL/model_weights/cifar10_weights"
model.save_weights(filepath, overwrite=True, save_format=None, options=None)
print('Model Saved!')

Model Saved!


In [14]:
# load model
savedModel = model.load_weights(filepath)
print('Model Loaded!')

Model Loaded!


In [17]:
x_test[0].shape

TensorShape([32, 32, 3])

In [20]:
pred = model.predict(tf.expand_dims(x_test[0], axis=0))


In [21]:
pred

array([[0.10946739, 0.1678851 , 0.11675544, 0.06360476, 0.11771584,
        0.06758458, 0.11948753, 0.1097965 , 0.06881279, 0.05888999]],
      dtype=float32)

In [23]:
lab = get_label(pred)


In [24]:
lab

'automobile'

# PART 2 BLACKBOX ATTACK

In [26]:
import numpy as np
import time
import os
from PIL import Image




In [25]:
class BoundaryAttack:
    """
    Object Boundary Attack, use method __call__ to create adversarial examples
    """

    def __init__(self, image_size, delta, epsilon, model_weights):
        print("Initializing the instance")
        self.image_size = image_size
        self.delta = delta
        self.epsilon = epsilon
        self.model_weights = model_weights

    def orthogonal_perturbation(self, prev_sample, target_sample):
        """
        Method to generate the orthogonal projection.

        :param prev_sample:
        :param target_sample:
        """
        # Sample from a Gaussian Distribution eta(k) perturbation
        perturb = np.random.randn(1, 32, 32, 3)

        # Rescale and clip the perturbation such that condition (1) and (2) hold
        # Basically getting the unit vector of the perturbation (Frobenius norm)
        perturb = perturb / np.linalg.norm(perturb, axis=(1,2))

        # Multiply the unit vector by the magnitude of the normalized perturbation
        perturb = perturb * (self.delta * np.mean(self.get_diff(target_sample, prev_sample))
        )

        # Project the perturbation onto the sphere  around the target
        # Orthogonal vector to sphere surface 
        diff = (target_sample - prev_sample).astype(np.float32)

        # Getting the orthogonal unit vector
        diff = diff / self.get_diff(target_sample - prev_sample)

        # We project onto the orthogonal then substract from perturb
        perturb = perturb - ((np.vdot(perturb, diff) / np.linalg.norm(diff) ** 2) * diff)

        # Check the overflow
        overflow = (prev_sample + perturb) - 255
        perturb -= overflow * (overflow > 0)
        return perturb 


    def get_diff(self, sample_1, sample_2):
	    """
        Channel-wise norm of difference between samples.
        """
	    return np.linalg.norm(sample_1 - sample_2, axis=(1, 2))

    def forward_perturbation(self, prev_sample, target_sample):
        """
        Generate forward perturbation - the perturbation reduces the distance
        of the perturbed image towards the original input by a relative amount
        epsilon.

        :param prev_sample: 
        :param target_sample:
        """
        perturb = (target_sample - prev_sample).astype(np.float32)
        perturb = perturb * epsilon
        return perturb

    def get_converted_pred(self, sample, model):
        """
        """
        sample = (sample).astype(np.uint8).astype(np.float32)
        label = model.predict(sample)
        return label

    def preprocess(sample_path):
	    """Load and preprocess image file."""
	    img = image.load_img(sample_path, target_size=(224, 224))
	    x = image.img_to_array(img)
	    x = np.expand_dims(x, axis=0)
	    x = preprocess_input(x)
	    return x

    def get_label(self, pred):
        """
            Method to get the label
        """
        dict = {0 : "airplane",
            1 : "automobile",
            2 :"bird",
            3 : "cat",
            4:  "deer",
            5: "dog",
            6 : "frog",
            7 : "horse",
            8 : "ship",
            9 : "truck"}
        high_class = np.argmax(pred)
        label = dict[high_class]
        return label


    
    def __call__(self, model):
        
	    # Load model, images and other parameters
	    classifier = model.load_weights(self.model_weights)

        # Pick initial example and target class randomly
	    initial_sample = preprocess('images/original/awkward_moment_seal.png')
	    target_sample = preprocess('images/original/bad_joke_eel.png')
     

	    folder = time.strftime('%Y%m%d_%H%M%S', time.localtime())
	    os.mkdir(os.path.join("images", folder))
	    save_image(np.copy(initial_sample), classifier, folder)
	    attack_class = np.argmax(classifier.predict(initial_sample))
	    target_class = np.argmax(classifier.predict(target_sample))

	    adversarial_sample = initial_sample
	    n_steps = 0
	    n_calls = 0
	    epsilon = 1.
	    delta = 0.1

	    # Move first step to the boundary
	    while True:
	    	trial_sample = adversarial_sample + self.forward_perturbation(adversarial_sample, target_sample)
	    	prediction = classifier.predict(trial_sample)
	    	n_calls += 1
	    	if np.argmax(prediction) == attack_class:
	    		adversarial_sample = trial_sample
	    		break
	    	else:
	    		epsilon *= 0.9

	    # Iteratively run attack
	    while True:
	    	print("Step #{}...".format(n_steps))
	    	# Orthogonal step
	    	print("\tDelta step...")
	    	d_step = 0
	    	while True:
	    		d_step += 1
	    		print("\t#{}".format(d_step))
	    		trial_samples = []
	    		for i in np.arange(10):
	    			trial_sample = adversarial_sample + orthogonal_perturbation(delta, adversarial_sample, target_sample)
	    			trial_samples.append(trial_sample)
	    		predictions = classifier.predict(trial_samples)
	    		n_calls += 10
	    		predictions = np.argmax(predictions, axis=1)
	    		d_score = np.mean(predictions == attack_class)
	    		if d_score > 0.0:
	    			if d_score < 0.3:
	    				delta *= 0.9
	    			elif d_score > 0.7:
	    				delta /= 0.9
	    			adversarial_sample = np.array(trial_samples)[np.where(predictions == attack_class)[0][0]]
	    			break
	    		else:
	    			delta *= 0.9
	    	# Forward step
	    	print("\tEpsilon step...")
	    	e_step = 0
	    	while True:
	    		e_step += 1
	    		print("\t#{}".format(e_step))
	    		trial_sample = adversarial_sample + forward_perturbation(epsilon, adversarial_sample, target_sample)
	    		prediction = classifier.predict(trial_sample)
	    		n_calls += 1
	    		if np.argmax(prediction) == attack_class:
	    			adversarial_sample = trial_sample
	    			epsilon /= 0.5
	    			break
	    		elif e_step > 500:
	    			break
			    else:
                    epsilon *= 0.5

		    n_steps += 1
		    chkpts = [1, 5, 10, 50, 100, 500]
		    if (n_steps in chkpts) or (n_steps % 500 == 0):
		    	print("{} steps".format(n_steps))
		    	save_image(np.copy(adversarial_sample), classifier, folder)
		    diff = np.mean(get_diff(adversarial_sample, target_sample))
		    if diff <= 1e-3 or e_step > 500:
		    	print("{} steps".format(n_steps))
		    	print("Mean Squared Error: {}".format(diff))
		    	save_image(np.copy(adversarial_sample), classifier, folder)
		    	break

		    print("Mean Squared Error: {}".format(diff))
		    print("Calls: {}".format(n_calls))
		    print("Attack Class: {}".format(attack_class))
		    print("Target Class: {}".format(target_class))
		    print("Adversarial Class: {}".format(np.argmax(prediction)))
  


IndentationError: ignored

In [None]:
perturb = np.random.randn(1, *(224, 224, 3))
perturb = perturb / np.linalg.norm(perturb, axis=(1, 2))

In [45]:
perturb = np.random.randn(1, 224, 224, 3)
perturb.shape
perturb

TypeError: ignored

In [24]:
perturb /= np.linalg.norm(perturb, axis=(1, 2))
perturb

array([[[[ 3.49066296e-03,  1.93777196e-03, -4.80984435e-03],
         [ 2.69266037e-03,  2.10417116e-03, -4.63658692e-03],
         [ 6.42859290e-03, -9.34718440e-03, -7.50181022e-04],
         ...,
         [-5.59760187e-03, -2.13336672e-03,  3.93314792e-03],
         [-2.81394665e-03,  4.85428465e-03,  1.12653709e-03],
         [-5.29175854e-05,  9.49804506e-03,  1.07695761e-02]],

        [[ 7.34786705e-03,  4.95181657e-03, -4.66485700e-03],
         [-2.49297371e-04, -3.01983550e-03,  4.60130612e-03],
         [ 3.01278315e-04,  4.37941744e-04,  4.54172519e-03],
         ...,
         [-5.81005179e-03,  1.42218169e-03,  9.19224747e-03],
         [-5.28037841e-03, -4.14497481e-03,  1.29692976e-03],
         [ 3.17139334e-03,  5.82867051e-03, -4.78086369e-04]],

        [[-1.35603706e-03,  6.27038170e-03,  3.87728727e-03],
         [ 3.88820178e-04,  2.52056214e-03,  8.23371908e-04],
         [ 5.02677385e-03,  2.20932522e-03,  1.66151587e-02],
         ...,
         [ 1.16041122e-0