In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Load and preprocess MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Define the model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# FGSM attack
def fgsm_attack(model, x, y, epsilon):
    x_tensor = tf.convert_to_tensor(x[np.newaxis, ...])
    y_tensor = tf.convert_to_tensor(y[np.newaxis, ...])
    
    with tf.GradientTape() as tape:
        tape.watch(x_tensor)
        prediction = model(x_tensor)
        loss = tf.keras.losses.categorical_crossentropy(y_tensor, prediction)
    
    gradient = tape.gradient(loss, x_tensor)
    signed_grad = tf.sign(gradient)
    perturbed_image = x_tensor + epsilon * signed_grad
    perturbed_image = tf.clip_by_value(perturbed_image, 0, 1)
    return perturbed_image[0]

# Adversarial training
def adversarial_train(model, x, y, epsilon):
    x_adv = fgsm_attack(model, x, y, epsilon)
    x_combined = tf.concat([x[np.newaxis, ...], x_adv[np.newaxis, ...]], axis=0)
    y_combined = tf.concat([y[np.newaxis, ...], y[np.newaxis, ...]], axis=0)
    model.train_on_batch(x_combined, y_combined)

# Evaluate model
def evaluate_model(model, x_test, y_test, epsilon):
    accuracy = 0
    for x, y in zip(x_test, y_test):
        x_adv = fgsm_attack(model, x, y, epsilon)
        pred = model.predict(x_adv[np.newaxis, ...])
        if np.argmax(pred) == np.argmax(y):
            accuracy += 1
    return accuracy / len(x_test)

# Visualize adversarial examples
def visualize_adversarial_examples(model, x_test, y_test, epsilon):
    fig, axs = plt.subplots(3, 3, figsize=(15, 15))
    for i in range(3):
        for j in range(3):
            idx = i * 3 + j
            x = x_test[idx]
            y = y_test[idx]
            x_adv = fgsm_attack(model, x, y, epsilon)
            
            axs[i, j].imshow(x.reshape(28, 28), cmap='gray')
            axs[i, j].axis('off')
            axs[i, j].set_title(f"Original: {np.argmax(y)}")
            
            axs[i, j].imshow(x_adv.numpy().reshape(28, 28), cmap='gray', alpha=0.5)
            pred = model.predict(x_adv[np.newaxis, ...])
            axs[i, j].set_xlabel(f"Adversarial: {np.argmax(pred)}")
    
    plt.tight_layout()
    plt.show()

# Main execution
model = create_model()
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2, verbose=1)

print("Evaluating standard accuracy...")
standard_accuracy = model.evaluate(x_test, y_test)[1]
print(f"Standard accuracy: {standard_accuracy:.4f}")

epsilon = 0.1
print(f"\nEvaluating adversarial accuracy (epsilon={epsilon})...")
adv_accuracy = evaluate_model(model, x_test, y_test, epsilon)
print(f"Adversarial accuracy: {adv_accuracy:.4f}")

print("\nPerforming adversarial training...")
for epoch in range(5):
    for x, y in zip(x_train, y_train):
        adversarial_train(model, x, y, epsilon)
    if epoch % 1 == 0:
        print(f"Epoch {epoch+1}/5")

print("\nEvaluating adversarial accuracy after training...")
adv_accuracy_after = evaluate_model(model, x_test, y_test, epsilon)
print(f"Adversarial accuracy after training: {adv_accuracy_after:.4f}")

print("\nVisualizing adversarial examples...")
visualize_adversarial_examples(model, x_test, y_test, epsilon)