## FAST GRADIENT SIGN METHOD ON TABULAR DATASET

In [1]:
# !pip install adversarial-robustness-toolbox

In [2]:
import numpy as np
import tensorflow as tf
from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.evasion import FastGradientMethod
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


import warnings 
warnings.filterwarnings('ignore')

In [3]:
data = load_breast_cancer()
X = data.data
y = data.target 

encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y.reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [4]:
# Load the dataset
data = load_breast_cancer()
X = data.data  # Features
y = data.target  # Labels

# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y.reshape(-1, 1))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [6]:
# Define a simple neural network
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(y_onehot.shape[1], activation='softmax')  # Output layer for classification
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5981 - loss: 152.3091 - val_accuracy: 0.3772 - val_loss: 8.6957
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3951 - loss: 13.4405 - val_accuracy: 0.6842 - val_loss: 3.9477
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6871 - loss: 5.1863 - val_accuracy: 0.8596 - val_loss: 0.4018
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8218 - loss: 1.0525 - val_accuracy: 0.6579 - val_loss: 1.2512
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7453 - loss: 1.5115 - val_accuracy: 0.9123 - val_loss: 0.2648
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8094 - loss: 0.6744 - val_accuracy: 0.9386 - val_loss: 0.2961
Epoch 7/10
[1m15/15[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x155eabbc0e0>

In [7]:
model.summary()

In [8]:
_, accuracy = model.evaluate(X_test, y_test)
print("Accuracy: {:.2f}%".format(accuracy * 100))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9265 - loss: 0.3126 
Accuracy: 93.86%


In [9]:
classifier = TensorFlowV2Classifier(model=model, nb_classes=y_onehot.shape[1], input_shape=(X_train.shape[1],), loss_object=tf.keras.losses.CategoricalCrossentropy())

attack = FastGradientMethod(estimator=classifier, eps=.7)  # eps controls the strength of the attack

X_test_adv = attack.generate(x=X_test)

_, accuracy = model.evaluate(X_test_adv, y_test)
print("Accuracy on adversarial examples: {:.2f}%".format(accuracy * 100))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9334 - loss: 0.2543
Accuracy on adversarial examples: 92.98%


## FAST GRADIENT SIGN METHOD ON IMAGE DATASET

In [10]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from art.attacks.evasion import FastGradientMethod
from art.estimators.classification import TensorFlowV2Classifier

In [11]:
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize the data

In [12]:
# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

In [13]:
# Build a simple neural network model
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

In [14]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8772 - loss: 0.4310 - val_accuracy: 0.9579 - val_loss: 0.1410
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9646 - loss: 0.1243 - val_accuracy: 0.9698 - val_loss: 0.0985
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9774 - loss: 0.0777 - val_accuracy: 0.9749 - val_loss: 0.0820
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9833 - loss: 0.0553 - val_accuracy: 0.9768 - val_loss: 0.0770
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9868 - loss: 0.0429 - val_accuracy: 0.9735 - val_loss: 0.0834


<keras.src.callbacks.history.History at 0x155ec31b740>

In [15]:
model.summary()

In [16]:
# Wrap the model using ART's TensorFlowV2Classifier wrapper
classifier = TensorFlowV2Classifier(
    model=model,
    loss_object=tf.keras.losses.CategoricalCrossentropy(),
    nb_classes=10,
    input_shape=(28, 28),
    clip_values=(0, 1)
)

In [17]:
# Create FGSM attack
attack = FastGradientMethod(estimator=classifier, eps=0.01)

# Generate adversarial examples for the test set
x_test_adv = attack.generate(x=x_test)

In [18]:
# Evaluate the model on clean and adversarial data
accuracy_clean = model.evaluate(x_test, y_test, verbose=0)[1]
accuracy_adv = model.evaluate(x_test_adv, y_test, verbose=0)[1]

print(f"Accuracy on clean test data: {accuracy_clean * 100:.2f}%")
print(f"Accuracy on adversarial test data: {accuracy_adv * 100:.2f}%")

Accuracy on clean test data: 97.35%
Accuracy on adversarial test data: 95.89%
