In [None]:
#@title Choose seed value
Seed = 1995 #@param {type:"integer"}
randomDim = 10 #@param {type:"integer"}
!pip install cleverhans
!rm -rf ./logs/
%load_ext tensorboard
%matplotlib inline

import datetime
import numpy as np
from numpy.random import Generator, PCG64
from matplotlib import pyplot as plt
import seaborn as sns
from cleverhans.tf2.attacks.carlini_wagner_l2 import carlini_wagner_l2
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from IPython.display import clear_output
from random import sample

tf.random.set_seed(Seed)  # Seeding Tensorflow.
np.random.seed(Seed)  # Seeding Numpy.
# log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)



# MNIST Dataset - Loading & Preprocessing

In [None]:
num_classes = 10
img_rows, img_cols, img_colors = 28, 28, 1  # Greyscale images of 28 X 28.

(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_train = X_train[:,:,:,np.newaxis]
X_test = (X_test.astype(np.float32) - 127.5)/127.5
X_test = X_test[:,:,:, np.newaxis]

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


# GAN Components

## Generator Model

In [None]:
# Optimizer
adam = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

# Generator
generator = keras.Sequential(name="MNIST_GAN_GENERATOR")
generator.add(layers.Dense(7*7*128, input_dim=randomDim))
generator.add(layers.LeakyReLU(0.2))
generator.add(layers.Reshape((7, 7, 128)))
generator.add(layers.UpSampling2D(size=(2, 2)))
generator.add(layers.Conv2D(64, kernel_size=(5, 5), padding='same'))
generator.add(layers.LeakyReLU(0.2))
generator.add(layers.UpSampling2D(size=(2, 2)))
generator.add(layers.Conv2D(1, kernel_size=(5, 5), padding='same', activation='tanh'))
generator.compile(loss='binary_crossentropy', optimizer=adam)

generator.summary()

## Discriminator Model

In [None]:
# Discriminator
discriminator = keras.Sequential(name="MNIST_GAN_DISCRIMINATOR")
discriminator.add(layers.Conv2D(64, kernel_size=(5, 5), strides=(2, 2), padding='same', input_shape=(28, 28, 1)))
discriminator.add(layers.LeakyReLU(0.2))
discriminator.add(layers.Dropout(0.3))
discriminator.add(layers.Conv2D(128, kernel_size=(5, 5), strides=(2, 2), padding='same'))
discriminator.add(layers.LeakyReLU(0.2))
discriminator.add(layers.Dropout(0.3))
discriminator.add(layers.Flatten())
discriminator.add(layers.Dense(1, activation='sigmoid'))
discriminator.compile(loss='binary_crossentropy', optimizer=adam)

discriminator.summary()

## Combined Model

In [None]:
# Combined network
discriminator.trainable = False
ganInput = layers.Input(shape=(randomDim,))
x = generator(ganInput)
ganOutput = discriminator(x)
gan = keras.Model(inputs=ganInput, outputs=ganOutput)
gan.compile(loss='binary_crossentropy', optimizer=adam)

## Graph Plot Functions for GAN Training

In [None]:
dLosses = []
gLosses = []

# Plot the loss from each batch
def plotLoss():
    plt.figure(figsize=(10, 8))
    plt.plot(dLosses, label='Discriminitive loss')
    plt.plot(gLosses, label='Generative loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Create a wall of generated MNIST images
def plotGeneratedImages(z=None,examples=100, dim=(10, 10), figsize=(10, 10)):
    if z is None :
      z = np.random.normal(0, 1, size=[examples, randomDim])
    generatedImages = generator.predict(z)

    plt.figure(figsize=figsize)
    for i in range(generatedImages.shape[0]):
        plt.subplot(dim[0], dim[1], i+1)
        plt.imshow(generatedImages[i].reshape((28,28)), interpolation='nearest', cmap='gray_r')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

## Training The GAN

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#@title GAN Training Config

gan_batch_size: int = 128  #@param {type:"integer"}
gan_epochs: int = 100  #@param {type:"integer"}
Train_gan: bool = False #@param {type:"boolean"}

def GAN_train(epochs=1, batchSize=128):
    batchCount = int(X_train.shape[0] / batchSize)
    print('Epochs:', epochs)
    print('Batch size:', batchSize)
    print('Batches per epoch:', batchCount)

    for epoch in range(1, epochs+1):
        print('\n','-'*15, 'Epoch ', epoch, '-'*15)
        for batch in range(batchCount):
            print("\rBatch number: %d/%d" % (batch, batchCount), end="")
            
            # Get a random set of input noise and images
            noise = np.random.normal(0, 1, size=[batchSize, randomDim])
            imageBatch = X_train[np.random.randint(0, X_train.shape[0], size=batchSize)]

            # Generate fake MNIST images
            generatedImages = generator.predict(noise)
            X = np.concatenate([imageBatch, generatedImages])

            # Labels for generated and real data
            yDis = np.zeros(2*batchSize)
            # One-sided label smoothing - this is done because of the 'sigmoid' output of the discriminator
            yDis[:batchSize] = 0.9

            # Train discriminator
            discriminator.trainable = True
            dloss = discriminator.train_on_batch(X, yDis)

            # Train generator
            noise = np.random.normal(0, 1, size=[batchSize, randomDim])
            yGen = np.ones(batchSize)
            discriminator.trainable = False
            gloss = gan.train_on_batch(noise, yGen)

        # Store loss of most recent batch from this epoch
        dLosses.append(dloss)
        gLosses.append(gloss)

        if epoch == 1 or epoch % 5 == 0:
            plotGeneratedImages()

    # Plot losses from every epoch
    plotLoss()

if Train_gan:
  GAN_train(gan_epochs, gan_batch_size)
  gan_path='/content/drive/MyDrive/AdversarialML/Ex5/Saved_model'
  model.save(gan_path)
  



In [None]:


if Train_gan == False:
  gan_path='/content/drive/MyDrive/AdversarialML/Ex5/ex5/ex5' # Zvika
  # gan_path = '/content/drive/MyDrive/Projects/AdversarialLearningCourse/HWs/AdversarialML/Ex5/ex5'  # Elad
  #tf.keras.models.load_model(gan_path)
  gan=tf.keras.models.load_model(gan_path)
  generator=gan.get_layer(index=1)
  discriminator=gan.get_layer(index=2)








# Attack Functions

> Below are all available attacks.

## FGSM

In [None]:
@tf.function
def FastGradientSignMethod(model, images, labels, epsilon=0.3):
    with tf.GradientTape() as grad:
        true_label_tensor = labels
        input_tensor = images
        predicted = model(input_tensor)
        adv_loss = keras.losses.categorical_crossentropy(true_label_tensor, predicted)
    adv_grads = grad.gradient(adv_loss, input_tensor)

    # Finally, the FGSM formula is rather straight forward x`= x + epsilon * sign(loss(x,y))
    delta = tf.cast(tf.sign(adv_grads), tf.float32)
    
    delta = tf.multiply(epsilon, delta)
    adv_out = input_tensor + delta
    return adv_out

## TGSM

In [None]:
@tf.function
def TargetedGradientSignMethod(model, images, target, epsilon=0.3):
    with tf.GradientTape() as grad:
        target_label_tensor = target
        input_tensor = images
        predicted = model(input_tensor)
        adv_loss = keras.losses.categorical_crossentropy(target_label_tensor, predicted)
    adv_grads = grad.gradient(adv_loss, input_tensor)

    # Finally, the TGSM formula is rather straight forward x`= x - epsilon * sign(loss(x,y))
    delta = tf.cast(tf.sign(adv_grads), tf.float32)
    
    delta = tf.multiply(epsilon, delta)
    adv_out = input_tensor - delta
    return adv_out

## PGD

In [None]:
@tf.function 
def PGD_Step(model, images, prev_step, labels, epsilon, iter_eps, min_x, max_x, targeted):
    with tf.GradientTape() as grad:
        input_tensor = prev_step
        if targeted:
          target_label = tf.math.abs(tf.math.subtract(tf.math.argmax(labels, axis=1), 5)) # target label =abs(label-5)
          target_label_tensor = tf.one_hot(target_label, num_classes)
        
        else:
          target_label_tensor = labels

        predicted = model(input_tensor)
        adv_loss = keras.losses.categorical_crossentropy(target_label_tensor, predicted)
    adv_grads = grad.gradient(adv_loss, input_tensor)



    # Perturb the input
    if targeted:
        adv_out = input_tensor - tf.multiply(iter_eps, tf.cast(tf.sign(adv_grads), tf.float32))
    else:
        adv_out = input_tensor + tf.multiply(iter_eps, tf.cast(tf.sign(adv_grads), tf.float32))

    # Project the perturbation to the epsilon ball (L2 projection)
    perturbation = adv_out - images
    norm = tf.reduce_sum(tf.square(perturbation), axis=(1,2,3), keepdims=True)
    norm = tf.sqrt(tf.maximum(10e-12, norm))
    factor = tf.minimum(1.0, tf.divide(epsilon, norm))
    adv_out = tf.clip_by_value(images + tf.multiply(perturbation, factor), min_x, max_x)

    return adv_out
  

@tf.function 
def PGD(model, images, labels, epsilon=0.1, iter_eps = 0.05, iterations=10, min_x=0.0, max_x=1.0, targeted=False):
    source = tf.identity(images)
    adv_out = images

    iteration = tf.constant(0, dtype=tf.int32)
    while tf.less(iteration, iterations):
        tf.print('Attack iteration:', iteration)
        adv_out.assign(PGD_Step(model, source, adv_out, labels, epsilon, iter_eps, min_x, max_x, targeted))
        iteration += 1

    return adv_out

## BIM

In [None]:
@tf.function
def BIM_Step(model, images, labels, epsilon, iter_eps, min_x, max_x, targeted):
    
    with tf.GradientTape() as grad:
        input_tensor = images
        target_label_tensor = labels
        predicted = model(input_tensor)
        adv_loss = keras.losses.categorical_crossentropy(target_label_tensor, predicted)
    adv_grads = grad.gradient(adv_loss, input_tensor)

    # Perturb the input
    if targeted:
        adv_out = input_tensor - tf.multiply(iter_eps, tf.cast(tf.sign(adv_grads), tf.float32))
    else:
        adv_out = input_tensor + tf.multiply(iter_eps, tf.cast(tf.sign(adv_grads), tf.float32))
        
    # Clip the intermdiate adversarial examples in order to ensure they are within
    # the valid data range
    delta = adv_out - images
    delta = tf.clip_by_value(delta, -epsilon, epsilon)
    adv_out = images + delta
    adv_out = tf.clip_by_value(adv_out, min_x, max_x)

    return adv_out


@tf.function
def BIM(model, images, labels, epsilon=0.1, iter_eps = 0.05, iterations=10, min_x=0.0, max_x=1.0, targeted=False):
    
    adv_out = images
    iteration = tf.constant(0, dtype=tf.int32)
    
    while tf.less(iteration, iterations):
        tf.print('Iteration:', iteration)
        # Note the use of 'assign' here instead of the '=' operator. This means, adv_out continues to point to the same
        # tensor, the same computation graph node, the same physical portion of GPU memory.
        # This is crucial because of the while loop structure. Recall that the 'tf.function' decoration creates a static 
        # computation graph. The python code is transformed into a graph only once.
        # Hence if we make adv_out point to a different graph node we will not be able to compute a valid gradient
        adv_out.assign(BIM_Step(model, adv_out, labels, epsilon, iter_eps, min_x, max_x, targeted))
        iteration += 1

    return adv_out

## Attack Choices & Evaluation

In [None]:
def commence_attack(model, attack_type, orig_images, true_labels, 
                    target_labels=None, attack_iters: int=1, 
                    epsilon: float=0.3, iter_eps: float=0.3):
    epsilon = tf.Variable(epsilon, dtype=tf.float32)
    iter_eps = tf.Variable(iter_eps)
    attack_iters = tf.Variable(attack_iters, dtype=tf.int32)

    if attack_type == "FGSM":
      adv_images = FastGradientSignMethod(model, orig_images, true_labels, 
                                          epsilon=epsilon).numpy()
      
    elif attack_type == "BIM(L_inf)":
      adv_images = BIM(model, orig_images, true_labels, epsilon=epsilon,
                       iter_eps=iter_eps, targeted=False, 
                       iterations=attack_iters).numpy()

    elif attack_type == "PGD(L_2)":
      adv_images = PGD(model, orig_images, true_labels, epsilon=epsilon,
                       iter_eps=iter_eps, targeted=False, 
                       iterations=attack_iters).numpy()
      
    elif attack_type == "PGD_target(L_2)":
      adv_images = PGD(model, orig_images, true_labels, epsilon=epsilon,
                       iter_eps=iter_eps, targeted=True, # if  targeted=True , then target labels:=abs(true_labels-5)
                       iterations=attack_iters).numpy()
      
    elif attack_type == "Carlini & Wagner(L_2)":
      adv_images = carlini_wagner_l2(model, orig_images.numpy(), targeted=False, 
                                     learning_rate=epsilon,
                                     max_iterations=attack_iters.numpy(),
                                     clip_min=-1.0)
      
    return adv_images

def TestAttack(model, adv_images, orig_images, true_labels, target_labels=None, targeted=False):
    ''' 
    A simple utility funcion for evaluating the success of an attack
    '''
    score = model.evaluate(adv_images, true_labels, verbose=0)
    print('Test loss: {:.2f}'.format(score[0]))
    print('Successfully moved out of source class: {:.2f}'.format( 1 - score[1]))

    
    if targeted:  # if targeted is True
        score = model.evaluate(adv_images, target_labels, verbose=0)
        print('Test loss: {:.2f}'.format(score[0]))
        print('Successfully perturbed to target class: {:.2f}'.format(score[1]))

    
    dist = np.mean(np.sqrt(np.mean(np.square(adv_images - orig_images), axis=(1,2,3))))
    print('Mean perturbation distance: {:.2f}'.format(dist))

def perform_all_attacks(model, target_labels,samples=100,save=False):
  attack_type_array = ["FGSM", "BIM(L_inf)","Carlini & Wagner(L_2)", "PGD(L_2)","PGD_target(L_2)"]
  
  sample_index=sample(range(len(X_test)),samples)
  adv_images_all=np.zeros([samples*len(attack_type_array),28,28,1])
  

  
  y_test_sample=y_test[sample_index].copy()
  X_test_sample=X_test[sample_index].copy()
  test_images_tensor = tf.Variable(X_test_sample, dtype=tf.float32)
  test_labels_tensor = tf.Variable(y_test_sample, dtype=tf.float32)

  for i,attack_type in enumerate(attack_type_array):
    # Performing the attack.
  
    
    is_attack_targeted: bool = attack_type == "PGD_target(L_2)"
    print('\n', attack_type + " method",'\n')
    adv_images = commence_attack(model, attack_type, test_images_tensor, 
                                 test_labels_tensor,
                                 target_labels=target_labels,
                                 attack_iters=attack_iters, epsilon=epsilon, 
                                 iter_eps=iter_eps)

    print(adv_images.shape)

    adv_images_all[i*samples:i*samples+samples]=adv_images
    print(adv_images_all.shape)


    # Evaluate the performance of the attack.
    TestAttack(model, adv_images, X_test[sample_index], y_test[sample_index], 
               targeted=is_attack_targeted)
  if save:
    return adv_images_all,X_test_sample,y_test_sample


# Attack Config

> Shared parameters between all attacks.

In [None]:
#@title Attack Configuration
epsilon = 0.1 #@param {type:"number"}
iter_eps = 0.05 #@param {type:"number"}
#attack_type = "FGSM" #@param ["FGSM", "BIM(L_inf)", "PGD(L_2)",'PGD_target(L_2)']
attack_iters = 4 #@param {type:"integer"}

#Classifiers For Investigation

In [None]:
def CNN_mnist_model(img_rows, img_cols, img_colors, num_classes, learning_rate,
                    name = "CNN"):
    activation = 'relu'
    model = keras.Sequential(name=name)
    model.add(layers.Conv2D(8, kernel_size=(3, 3), input_shape=(img_rows, img_cols, img_colors), activation=activation))
    model.add(layers.Conv2D(8, (3, 3), activation=activation))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Dropout(0.25))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation=activation))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes))
    model.add(layers.Activation('softmax', name='softmax'))

    model.compile(loss='categorical_crossentropy',
                optimizer=keras.optimizers.Adadelta(learning_rate),
                metrics=[keras.metrics.CategoricalAccuracy()])
    return model


def train_model(model, max_epochs: int, train_images, train_labels, test_images, 
                test_labels, batch_size, callbacks_list, verbose=1):
    history = model.fit(train_images, train_labels,
        batch_size=batch_size,
        epochs=max_epochs,
        verbose=verbose,
        validation_data=(test_images, test_labels),
        callbacks=callbacks_list)
    return history

def lr_scheduler(epoch):
  return learning_rate * (0.5 ** (epoch // lr_drop))
reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)

# Training The Classifier

In [None]:
#@title Select The Classifier to Attack and Hyperparameters
selected_model = "CNN from Lecture 2" #@param ["CNN from Lecture 2", "NN from Ex 1"]
batch_size = 128 #@param {type:"integer"}
max_epochs = 12 #@param {type:"integer"}
learning_rate = 0.1 #@param {type:"number"}
lr_decay = 1e-6 #@param {type:"number"}
lr_drop = 6 #@param {type:"number"}


classifier = CNN_mnist_model(img_rows, img_cols, img_colors, 
                             num_classes, learning_rate)
print('Architecture of Classifier:')
classifier.summary()

callbacks_list = [reduce_lr]
print("Training the teacher model for all 10 classes of MNIST")
classifier_history = train_model(classifier, max_epochs, X_train, 
                                 y_train, X_test, y_test, 
                                 batch_size, callbacks_list)

Architecture of Classifier:
Model: "CNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 26, 26, 8)         80        
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 8)         584       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 8)         0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 12, 12, 8)         0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1152)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               147584    
_________________________________________________________________
dropout_5 (Dropout)          (None,

# Gradient-Descent for GAN-Detector

In [None]:
@tf.function 
def find_optimal_z_step(generator_model, x: tf.Tensor, z: tf.Variable, 
                        learning_rate: tf.constant):

    with tf.GradientTape() as grad:
        input_tensor = z
        predicted = generator_model(input_tensor)
        z_loss = keras.losses.MSE(tf.reshape(x, [x.shape[0], -1]), 
                                  tf.reshape(predicted, [predicted.shape[0], -1]))
    z_grads = grad.gradient(z_loss, input_tensor)

    z_out = input_tensor - tf.multiply(learning_rate, tf.cast(z_grads, tf.float32))
    return z_out


z = tf.Variable(np.ones((2, randomDim)), name = 'z', dtype = tf.float32)

@tf.function
def find_optimal_z(generator_model, x: tf.Tensor, z: tf.Variable, 
                   epochs_num: tf.constant, learning_rate: tf.constant):
    z_minimal = z
    iteration = tf.constant(0, dtype=tf.int32)
     
    
    while tf.less(iteration, epochs_num):
        #tf.print('GD iteration:', iteration)
        z_minimal.assign(find_optimal_z_step(generator_model, x, z_minimal, 
                                             learning_rate))

        iteration += 1
        if tf.math.floormod(iteration,25)==0:
          learning_rate_i=learning_rate/tf.constant(2, dtype=tf.float32)

    return z_minimal

## Commencing All Attacks

In [None]:
adv_images_all, X_test_sample, y_test_sample = perform_all_attacks(classifier, None, samples=100, save=True)


 FGSM method 

(100, 28, 28, 1)
(500, 28, 28, 1)
Test loss: 0.19
Successfully moved out of source class: 0.06
Mean perturbation distance: 0.10

 BIM(L_inf) method 

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
(100, 28, 28, 1)
(500, 28, 28, 1)
Test loss: 3.39
Successfully moved out of source class: 0.94
Mean perturbation distance: 0.97

 Carlini & Wagner(L_2) method 

(100, 28, 28, 1)
(500, 28, 28, 1)
Test loss: 3.04
Successfully moved out of source class: 0.77
Mean perturbation distance: 0.98

 PGD(L_2) method 

Attack iteration: 0
Attack iteration: 1
Attack iteration: 2
Attack iteration: 3
(100, 28, 28, 1)
(500, 28, 28, 1)
Test loss: 3.46
Successfully moved out of source class: 0.95
Mean perturbation distance: 0.97

 PGD_target(L_2) method 

Attack iteration: 0
Attack iteration: 1
Attack iteration: 2
Attack iteration: 3
(100, 28, 28, 1)
(500, 28, 28, 1)
Test loss: 3.40
Successfully moved out of source class: 0.95
Test loss: 0.00
Successfully perturbed to target class: 0.00
Me

In [None]:
#learning_rate_z=tf.constant(0.2) - decreases by half evry 25 epochs

z = np.random.normal(0, 1, size=[len(X_test_sample), randomDim])
z = tf.Variable(z, name = 'z', dtype = tf.float32)


x_gan = tf.Variable(X_test_sample[:,:,:,0], dtype=tf.float32)
z = find_optimal_z(generator, x_gan, z, epochs_num=tf.constant(200), learning_rate=tf.constant(0.2))
loss_z=np.linalg.norm(generator(z)[:,:,:,0] - x_gan, axis=(1, 2))


In [None]:


epsilon_z=20
print("% Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ", 100*np.mean(loss_z>epsilon_z))

index=np.where(loss_z<epsilon_z)[0]
attack_type_array = ["FGSM", "BIM(L_inf)", "PGD(L_2)", "PGD_target(L_2)" ,"Carlini & Wagner(L_2)"]
for i,attack_type in enumerate(attack_type_array):
  print('\n', attack_type + " method",'\n')
  predict_true=classifier.predict_classes(adv_images_all[i*100:i*100+100])==np.argmax(y_test_sample,axis=1)

  predict_z_true=np.linalg.norm(generator(z)[:,:,:,0] - adv_images_all[i*100:i*100+100][:,:,:,0], axis=(1, 2))>epsilon_z

  print("Correctly classified to the original class label %",sum(predict_z_true | predict_true))
  print("Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ  %", 100*np.mean(predict_z_true))
  print("% Wrongly classified", 100-sum(predict_z_true | predict_true))



  

% Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ 9.0

 FGSM method 





Correctly classified to the original class label % 95
Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ  % 9.0
% Wrongly classified 5

 BIM(L_inf) method 

Correctly classified to the original class label % 100
Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ  % 100.0
% Wrongly classified 0

 PGD(L_2) method 

Correctly classified to the original class label % 100
Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ  % 100.0
% Wrongly classified 0

 PGD_target(L_2) method 

Correctly classified to the original class label % 100
Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ  % 100.0
% Wrongly classified 0

 Carlini & Wagner(L_2) method 

Correctly classified to the original class label % 100
Correctly identified by the detector – that is ‖x -G(z*)‖≥ϵ  % 100.0
% Wrongly classified 0


In [None]:
index=sample(range(len(X_test)),400)
X_test_400=X_test[index]
y_test_400=y_test[index]
#learning_rate_z=tf.constant(0.2) - decreases by half evry 25 epochs

z = np.random.normal(0, 1, size=[len(X_test_400), randomDim])
z = tf.Variable(z, name = 'z', dtype = tf.float32)


x_gan = tf.Variable(X_test_400[:,:,:,0], dtype=tf.float32)
z = find_optimal_z(generator, x_gan, z, epochs_num=tf.constant(200), learning_rate=tf.constant(0.2))
loss_z=np.linalg.norm(generator(z)[:,:,:,0] - x_gan, axis=(1, 2))

In [None]:
predict_z_true=np.linalg.norm(generator(z)[:,:,:,0] - X_test_400[:,:,:,0], axis=(1, 2))>epsilon_z
print("The False Positive detection rate: ",np.round(np.mean(predict_z_true)*100,2))



The False Positive detection rate:  6.25
