<a href="https://colab.research.google.com/github/Divyanshi-16/Adversarial-Robustness/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Dataset:** MNIST dataset

**Model:** Convolutional Neural Network

In [None]:
from zipfile import ZipFile
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import regularizers
from sklearn.metrics import precision_score, recall_score
from tensorflow.keras.layers import Conv2D, MaxPooling2D

with ZipFile('mnist_train.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('mnist_train')

with ZipFile('mnist_test.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('mnist_test')

train_data = pd.read_csv('mnist_train/mnist_train.csv')
test_data = pd.read_csv('mnist_test/mnist_test.csv')

x_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values
x_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

# Normalize pixel values to be between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

# Reshape input data
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(x_train)

# Define the model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), strides=(1, 1), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),
    Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    BatchNormalization(),
    Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    BatchNormalization(),
    Dense(10, activation='softmax')
])

# Compile the model
initial_learning_rate = 0.001
opt = Adam(lr=initial_learning_rate)
model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Learning rate scheduling
def lr_schedule(epoch):
    return initial_learning_rate * 0.9 ** epoch

lr_scheduler = LearningRateScheduler(lr_schedule)

# Train the model
history = model.fit(datagen.flow(x_train, y_train, batch_size=32),
                    epochs=20,
                    validation_data=(x_test, y_test),
                    callbacks=[lr_scheduler])

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_accuracy}')
y_pred = np.argmax(model.predict(x_test), axis=1)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'Precision: {precision}')
print(f'Recall: {recall}')



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy: 0.995199978351593
Precision: 0.9952510408343596
Recall: 0.9951096929467462


**Initial Robustness:** FGSM White Box Attack

In [None]:
x_test_tf = tf.convert_to_tensor(x_test, dtype=tf.float32)

# Generate adversarial examples using FGSM attack in TensorFlow
eps = 0.3
with tf.GradientTape() as tape:
    tape.watch(x_test_tf)
    predictions = model(x_test_tf)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_test, predictions)
gradients = tape.gradient(loss, x_test_tf)
x_test_adv = tf.clip_by_value(x_test_tf + eps * tf.sign(gradients), 0, 1)

# Evaluate the model on adversarial examples
adv_test_loss, adv_test_accuracy = model.evaluate(x_test_adv, y_test)
print(f'Test accuracy on adversarial examples: {adv_test_accuracy}')

# Get the predicted labels for adversarial examples
y_pred_prob = model.predict(x_test_adv)
y_pred = np.argmax(y_pred_prob, axis=1)

# Calculate precision and recall
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'Precision: {precision}')
print(f'Recall: {recall}')

Test accuracy on adversarial examples: 0.09769999980926514
Precision: 0.1136590611896692
Recall: 0.10015714797275717


  _warn_prf(average, modifier, msg_start, len(result))


**Initial Robustness:** PGD White Box Attack

In [None]:
x_test_tf = tf.convert_to_tensor(x_test, dtype=tf.float32)

# Generate adversarial examples using PGD attack in TensorFlow
eps = 0.3
eps_iter = 0.01
nb_iter = 40
eps_proj = 0.3
adv_x = x_test_tf

for i in range(nb_iter):
    with tf.GradientTape() as tape:
        tape.watch(adv_x)
        predictions = model(adv_x)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_test, predictions)
    gradients = tape.gradient(loss, adv_x)
    perturbation = eps_iter * tf.sign(gradients)
    adv_x = tf.clip_by_value(adv_x + perturbation, x_test_tf - eps, x_test_tf + eps)
    adv_x = tf.clip_by_value(adv_x, x_test_tf - eps_proj, x_test_tf + eps_proj)

# Evaluate TensorFlow Keras model on adversarial test data
adv_test_loss, adv_test_accuracy = model.evaluate(adv_x, y_test)
print(f'Test accuracy on adversarial examples: {adv_test_accuracy}')

Test accuracy on adversarial examples: 0.0


Accuracy on Clean Test Dataset after Retraining

In [None]:
from zipfile import ZipFile
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import regularizers
from sklearn.metrics import precision_score, recall_score
from tensorflow.keras.layers import Conv2D, MaxPooling2D

# Extract the training data
with ZipFile('mnist_train.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('mnist_train')

# Extract the test data
with ZipFile('mnist_test.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('mnist_test')

# Load the training and test data
train_data = pd.read_csv('mnist_train/mnist_train.csv')
test_data = pd.read_csv('mnist_test/mnist_test.csv')

# Define x_train and y_train
x_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values

# Normalize pixel values to be between 0 and 1
x_train = x_train / 255.0

# Reshape input data
x_train = x_train.reshape(-1, 28, 28, 1)

# Randomly select 20,000 samples from the training dataset
num_samples = 20000
indices = np.random.choice(x_train.shape[0], num_samples, replace=False)
x_train_subset = x_train[indices]
y_train_subset = y_train[indices]

# Load the test data
x_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

# Normalize pixel values to be between 0 and 1
x_test = x_test / 255.0

# Reshape input data
x_test = x_test.reshape(-1, 28, 28, 1)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(x_train)

# Define the model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), strides=(1, 1), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),
    Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    BatchNormalization(),
    Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    BatchNormalization(),
    Dense(10, activation='softmax')
])

# Compile the model
initial_learning_rate = 0.001
opt = Adam(lr=initial_learning_rate)
model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Learning rate scheduling
def lr_schedule(epoch):
    return initial_learning_rate * 0.9 ** epoch

lr_scheduler = LearningRateScheduler(lr_schedule)

# Train the model on the subset of the training dataset
history = model.fit(datagen.flow(x_train_subset, y_train_subset, batch_size=32),
                    epochs=1,
                    validation_data=(x_test, y_test),
                    callbacks=[lr_scheduler])

# FGSM adversarial attack function
def fgsm_attack(model, x, y, eps=0.3):
    x = tf.convert_to_tensor(x, dtype=tf.float32)
    y = tf.convert_to_tensor(y, dtype=tf.int32)

    with tf.GradientTape() as tape:
        tape.watch(x)
        predictions = model(x)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y, predictions)
    gradients = tape.gradient(loss, x)
    x_adv = tf.clip_by_value(x + eps * tf.sign(gradients), 0, 1)
    return x_adv.numpy()

# Generate adversarial examples for training data
x_train_adv = fgsm_attack(model, x_train_subset, y_train_subset)

# Concatenate original and adversarial training data
x_combined = np.concatenate((x_train_subset, x_train_adv), axis=0)
y_combined = np.concatenate((y_train_subset, y_train_subset), axis=0)

# Shuffle the combined dataset
combined_dataset = list(zip(x_combined, y_combined))
np.random.shuffle(combined_dataset)
x_combined, y_combined = zip(*combined_dataset)
x_combined = np.array(x_combined)
y_combined = np.array(y_combined)

# Retrain the model on the combined dataset
history = model.fit(datagen.flow(x_combined, y_combined, batch_size=32),
                    epochs=20,
                    validation_data=(x_test, y_test),
                    callbacks=[lr_scheduler])

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test accuracy after adversarial training: {test_accuracy}')
y_pred = np.argmax(model.predict(x_test), axis=1)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'Precision after adversarial training: {precision}')
print(f'Recall after adversarial training: {recall}')



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy after adversarial training: 0.9923999905586243
Precision after adversarial training: 0.9925874631924231
Recall after adversarial training: 0.9923019785455216


Accuracy on Adversarial Test Dataset after Retraining

In [None]:
from zipfile import ZipFile
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import regularizers
from sklearn.metrics import precision_score, recall_score
from tensorflow.keras.layers import Conv2D, MaxPooling2D

with ZipFile('mnist_train.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('mnist_train')

with ZipFile('mnist_test.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('mnist_test')

train_data = pd.read_csv('mnist_train/mnist_train.csv')
test_data = pd.read_csv('mnist_test/mnist_test.csv')

x_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values

# Normalize pixel values to be between 0 and 1
x_train = x_train / 255.0

# Reshape input data
x_train = x_train.reshape(-1, 28, 28, 1)

# Randomly select 20,000 samples from the training dataset
num_samples = 20000
indices = np.random.choice(x_train.shape[0], num_samples, replace=False)
x_train_subset = x_train[indices]
y_train_subset = y_train[indices]

# Generate adversarial examples for training data
def fgsm_attack(model, x, y, eps=0.3):
    x = tf.convert_to_tensor(x, dtype=tf.float32)
    y = tf.convert_to_tensor(y, dtype=tf.int32)

    with tf.GradientTape() as tape:
        tape.watch(x)
        predictions = model(x)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y, predictions)
    gradients = tape.gradient(loss, x)
    x_adv = tf.clip_by_value(x + eps * tf.sign(gradients), 0, 1)
    return x_adv.numpy()

x_train_adv = fgsm_attack(model, x_train_subset, y_train_subset)

# Concatenate original and adversarial training data
x_combined = np.concatenate((x_train_subset, x_train_adv), axis=0)
y_combined = np.concatenate((y_train_subset, y_train_subset), axis=0)

# Shuffle the combined dataset
combined_dataset = list(zip(x_combined, y_combined))
np.random.shuffle(combined_dataset)
x_combined, y_combined = zip(*combined_dataset)
x_combined = np.array(x_combined)
y_combined = np.array(y_combined)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(x_combined)

# Define the model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), strides=(1, 1), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),
    Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    BatchNormalization(),
    Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    BatchNormalization(),
    Dense(10, activation='softmax')
])

# Compile the model
initial_learning_rate = 0.001
opt = Adam(learning_rate=initial_learning_rate)
model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Learning rate scheduling
def lr_schedule(epoch):
    return initial_learning_rate * 0.9 ** epoch

lr_scheduler = LearningRateScheduler(lr_schedule)

# Train the model on the combined dataset
history = model.fit(datagen.flow(x_combined, y_combined, batch_size=32),
                    epochs=20,
                    validation_data=(x_test_adv, y_test),
                    callbacks=[lr_scheduler])

# Generate adversarial examples using FGSM attack for test data
x_test_tf = tf.convert_to_tensor(x_test, dtype=tf.float32)
eps = 0.3

with tf.GradientTape() as tape:
    tape.watch(x_test_tf)
    predictions = model(x_test_tf)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_test, predictions)
gradients = tape.gradient(loss, x_test_tf)
x_test_adv = tf.clip_by_value(x_test_tf + eps * tf.sign(gradients), 0, 1)

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_accuracy}')

# Evaluate the model on adversarial test data
adv_test_loss, adv_test_accuracy = model.evaluate(x_test_adv, y_test)
print(f'Test accuracy on adversarial examples: {adv_test_accuracy}')

# Get the predicted labels for adversarial examples
y_pred_prob = model.predict(x_test_adv)
y_pred = np.argmax(y_pred_prob, axis=1)

# Calculate precision, recall, and accuracy for adversarial examples
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'Precision: {precision}')
print(f'Recall: {recall}')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy: 0.992900013923645
Test accuracy on adversarial examples: 0.15230000019073486
Precision: 0.20463383831109366
Recall: 0.15626776281692786
