<a href="https://colab.research.google.com/github/Sugandh-Mishra/attacks/blob/main/L_BFGS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from tensorflow.keras import layers, models, datasets
import numpy as np

In [2]:
def train_model():
    (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
    train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255.0
    test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255.0
    
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu',padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(train_images, train_labels, epochs=2, validation_data=(test_images, test_labels))
  
    return model, test_images, test_labels, train_images

In [3]:
model, test_images, test_labels, x_train = train_model()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/2
Epoch 2/2


In [4]:
import numpy as np
import scipy.optimize as opt

# Define the functions for distance, cross-entropy loss, and total loss

def Dis(x, x1):
    d = x - x1
    D = np.sqrt(np.sum(d**2))
    return D

def c_e(x1, y_prime):
    y1 = np.zeros(10)  # Assuming 10 classes for one-hot encoding
    y1[y_prime] = 1
    yh = svc.predict_proba([x1, x1])
    ce = -(y1 * np.log(yh)).sum()
    return ce

def l_fun(x1, *args):
    x = args[0]
    y_prime = args[1]
    c = args[2]
    ly = svc.predict_proba([x1, x1])[0]
    l_f = c * Dis(x, x1) + c_e(x1, y_prime)
    return l_f

def L_BFGS_Attack(x, y_prime, c, eps):
    # x: Original input
    # y_prime: Target class for the attack
    # c: Weight for the total loss
    # eps: Epsilon for perturbation

    initial = np.ones(len(x))
    bounds = [(x[i] - eps, x[i] + eps) for i in range(len(x))]  # Bounds for the perturbed input
    x1 = opt.fmin_l_bfgs_b(l_fun, x0=initial, args=(x, y_prime, c), bounds=bounds, approx_grad=True)
    x2 = x1[0]
    D = Dis(x, x2)
    yh = svc.predict([x2, x2])
    return x2, yh[0], D





In [5]:
# Example usage:
x = np.array([0.5, 0.6, 0.7])  # Original input
y_prime = 3  # Target class for the attack
c = 0.1  # Weight for the total loss
eps = 0.01  # Epsilon for perturbation



In [6]:

# Call the L-BFGS attack function
x2, yh, D = L_BFGS_Attack(x, y_prime, c, eps)

In [7]:
print("Perturbed Input:", x2)
print("Predicted Class Label:", yh)
print("Euclidean Distance:", D)

In [None]:
# Call the L-BFGS adversarial attack function
adv_images, adv_probs, adv_labels = lbfgs_attack(model, test_images, test_labels, epsilon=0.01, max_iterations=100)

# Print the results
print("Adversarial Images Shape:", adv_images.shape)
print("Adversarial Probabilities Shape:", adv_probs.shape)
print("Adversarial Labels Shape:", adv_labels.shape)


In [None]:
# Evaluate the accuracy of the model on the original test images
_, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
print(f"Accuracy on original test images: {test_accuracy}")

# Evaluate the accuracy of the model on the adversarial images
_, adv_accuracy = model.evaluate(adv_images, test_labels, verbose=0)
print(f"Accuracy on adversarial images: {adv_accuracy}")

In [None]:
def visualize_lbfgs_attack(original_images, attacked_images, original_labels, attacked_labels, probs):
    num_images = len(original_images)
    num_rows = 10
    num_cols = 10
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 12))

    for i in range(num_rows):
        for j in range(num_cols):
            index = i * num_cols + j
            if index < num_images:
                axes[i][j].imshow(original_images[index].reshape(28, 28), cmap='gray')
                axes[i][j].axis('off')
                if attacked_labels[index] == original_labels[index]:
                    # If original label is same as attacked label, display it in green color
                    axes[i][j].set_title(f'Original: {original_labels[index]}\nAttacked: {attacked_labels[index]}\nProb: {probs[index][attacked_labels[index]]:.2f}', color='green')
                else:
                    # If original label is different from attacked label, display it in red color
                    axes[i][j].set_title(f'Original: {original_labels[index]}\nAttacked: {attacked_labels[index]}\nProb: {probs[index][attacked_labels[index]]:.2f}', color='red')
            else:
                axes[i][j].axis('off')
    plt.tight_layout()
    plt.show()


visualize_lbfgs_attack(test_images,adv_images,test_labels,adv_labels,adv_probs)

In [57]:
import numpy as np
import scipy.optimize as opt
import tensorflow as tf

# Define the CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Define the functions for distance, cross-entropy loss, and total loss
def distance(x, x_adv):
    d = x - x_adv
    D = np.sqrt(np.sum(d**2))
    return D

def cross_entropy(x_adv, y_target):
    y_onehot = np.zeros(10)  # Assuming 10 classes for one-hot encoding
    y_onehot[y_target] = 1
    y_pred = model.predict(np.array([x_adv, x_adv]))
    ce = -(y_onehot * np.log(y_pred)).sum()
    return ce

def loss_function(x_adv, *args):
    x = args[0]
    y_target = args[1]
    weight = args[2]
    y_pred = model.predict(np.array([x_adv, x_adv]))[0]
    loss = weight * distance(x, x_adv) + cross_entropy(x_adv, y_target)
    return loss

def l_bfgs_attack(x, y_target, weight, epsilon):
    # x: Original input
    # y_target: Target class for the attack
    # weight: Weight for the total loss
    # epsilon: Epsilon for perturbation

    initial = np.ones(len(x))
    bounds = [(x[i] - epsilon, x[i] + epsilon) for i in range(len(x))]  # Bounds for the perturbed input
    x_adv = opt.fmin_l_bfgs_b(loss_function, x0=initial, args=(x, y_target, weight), bounds=bounds, approx_grad=True)
    x_adv = x_adv[0]
    distance_adv = distance(x, x_adv)
    y_pred = model.predict(np.array([x_adv, x_adv]))
    return x_adv, np.argmax(y_pred[0]), distance_adv




def train_model():
    (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
    train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255.0
    test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255.0
    
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu',padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(train_images, train_labels, epochs=2, validation_data=(test_images, test_labels))
  
    return model, test_images, test_labels, train_images

model, x, y, x_train = train_model()

# Define the original input x, target class y_target, weight c, and epsilon eps
x = np.random.rand(28, 28, 1)  # Example original input with shape (28, 28, 1)
y_target = 5  # Example target class for the attack
c = 0.01  # Example weight for the total loss
eps = 0.1  # Example epsilon for perturbation

# Perform the L-BFGS-based targeted attack
x_adv, y_pred, distance_adv = l_bfgs_attack(x, y_target, c, eps)

# Print the results
print('Original Input: x')
print('Target Class for Attack: y_target')
print('Weight for Total Loss: c')
print('Epsilon for Perturbation: eps')
print('Adversarial Input: x_adv')
print('Predicted Class for Adversarial Input: y_pred')
print('Distance between Original Input and Adversarial Input: distance_adv')


Epoch 1/2
Epoch 2/2


ValueError: ignored

In [None]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import load_model
from scipy.optimize import fmin_l_bfgs_b
from keras.losses import categorical_crossentropy

# Load the trained CNN model
# model = load_model('path/to/your/trained/model.h5')

# Define the functions for distance, cross-entropy loss, and total loss
def distance(x, x1):
    d = x - x1
    D = np.sqrt(np.sum(d**2))
    return D


def cross_entropy(x1, y_prime):
    y1 = tf.zeros((1, 10))  # Assuming 10 classes for one-hot encoding, add batch dimension
    y1 = tf.tensor_scatter_nd_update(y1, [[0, y_prime]], [1.])  # Update y1 with 1 at y_prime index
    yh1 = model.predict(tf.expand_dims(x1, axis=0))  # Get prediction for x1
    ce = tf.reduce_sum(tf.keras.losses.categorical_crossentropy(y1, yh1))  # Use yh1 for x1 prediction
    return ce




def total_loss(x1, *args):
    x = args[0]
    y_prime = args[1]
    c = args[2]
    x_shape = x.shape
    x1 = x1.reshape(x_shape)  # Reshape x1 to match the original input shape
    ly = model.predict(np.array([x1, x1]))[0]
    l_f = c * distance(x, x1) + cross_entropy(x1, y_prime)
    return l_f

def l_bfgs_attack(x, y_target, c, eps):
    # x: Original input
    # y_target: Target class for the attack
    # c: Weight for the total loss
    # eps: Epsilon for perturbation

    x_shape = x.shape
    initial = np.ones(x_shape).flatten()  # Flatten initial to match the original input shape
    bounds = [(x.flatten()[i] - eps, x.flatten()[i] + eps) for i in range(x_shape[0] * x_shape[1])]  # Bounds for the perturbed input

    x_adv, _, _ = fmin_l_bfgs_b(total_loss, x0=initial, args=(x, y_target, c), bounds=bounds, approx_grad=True)
    x_adv = x_adv.reshape(x_shape)  # Reshape x_adv to match the original input shape
    y_probs = model.predict(np.array([x_adv, x_adv]))  # Get predicted probabilities for x_adv
    y_pred = np.argmax(y_probs, axis=-1)[0]  # Get predicted class label
    distance_adv = distance(x, x_adv)
    return x_adv, y_pred, distance_adv



# Load MNIST data
# ... code to load MNIST data ...

# Select a sample from the MNIST dataset
x_sample = x_train[0]  # Replace with your own sample

# Set the target class for the attack
y_target = 5  # Replace with the desired target class for the attack

# Set the weight for the total loss and epsilon for perturbation
c = 1.0  # Replace with your desired weight for the total loss
eps = 0.1  # Replace with your desired epsilon for perturbation

# Run the L-BFGS attack
x_adv, y_pred, distance_adv = l_bfgs_attack(x_sample, y_target, c, eps)



In [71]:

# Print the results
print('--- Results ---')
print('Original Input: x_sample')
print(x_sample)
# print(model.predict(x_sample))
# y=model.predict(x_sample)
# print(np.argmax(y[0]))
print('Target Class for Attack: y_target')
print(y_target)
print('Weight for Total Loss: c')
print(c)
print('Epsilon for Perturbation: eps')
print(eps)
# print('Adversarial Input: x_adv')
# print(x_adv)
print('Predicted Class for Adversarial Input: y_pred')
print(y_pred)
print('Distance between Original Input and Adversarial Input: distance_adv')
print(distance_adv)

--- Results ---
Original Input: x_sample
[[[0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]]

 [[0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]]

 [[0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.        ]
  [0.     