<a href="https://colab.research.google.com/github/Sugandh-Mishra/attacks/blob/main/jsma.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from tensorflow.keras import layers, models, datasets
import numpy as np

In [None]:
def train_model():
    (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
    train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255.0
    test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255.0
    
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu',padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(train_images, train_labels, epochs=2, validation_data=(test_images, test_labels))
  
    return model, test_images, test_labels, train_images

In [None]:
# Train the model
model, x_test, y_test, x_train = train_model()


In [None]:
def jsma_attack(model, x, y, epsilon=0.1, max_iters=100):
    perturbed_x = tf.identity(x)
    for _ in range(max_iters):
        with tf.GradientTape() as tape:
            tape.watch(perturbed_x)
            predictions = model(perturbed_x)
            loss = tf.keras.losses.sparse_categorical_crossentropy(y, predictions)
        
        gradients = tape.gradient(loss, perturbed_x)
        gradients = tf.sign(gradients)
        perturbed_x = perturbed_x + epsilon * gradients
        perturbed_x = tf.clip_by_value(perturbed_x, clip_value_min=x-epsilon, clip_value_max=x+epsilon)
        
    return perturbed_x

In [None]:
def test_perturbed_images(model, x, y, epsilon=0.1, max_iters=100):
    perturbed_images = jsma_attack(model, x, y, epsilon, max_iters)
    predictions = model(perturbed_images)
    success_rate = np.mean(tf.argmax(predictions, axis=-1) == tf.argmax(y_test, axis=-1))

    # success_rate = np.mean(tf.argmax(predictions, axis=1) == tf.argmax(y, axis=1))
    return success_rate,perturbed_images

In [19]:
#  Perform JSMA attack and test perturbed images
epsilon = 0.1
max_iters = 100
success_rate,perturbed_images = test_perturbed_images(model, x_test, y_test, epsilon, max_iters)


In [20]:

print("Original Image - True Label: {}".format(tf.argmax(y_test, axis=-1)))
print("Perturbed Image - Predicted Label: {}".format(tf.argmax(model(perturbed_images), axis=-1)))
print("Success Rate of JSMA Attack: {:.2f}%".format(success_rate * 100))


Original Image - True Label: 7
Perturbed Image - Predicted Label: [3 1 7 ... 9 8 5]
Success Rate of JSMA Attack: 7.20%
