

Using the python tools of your preference (TensorFlow, scikit-learn, numpy, pandas, etc).

1.- Obtain the MNIST dataset and normalize it for use with a classifier.

2.- Select two different machine learning classification models (like logistic regression, random forests, SVMs, Gaussian Mixtures, Naive Bayes, KNN, etc). Let's call them Model A and B.

3.- Train both models on the MNIST dataset and achieve a "decent" testing accuracy (over 90%).

4.- Using the gradient sign equation in Slide 27 of the lecture, generate 100 adversarial examples for each model. Analyze your results, like what confidences or scores each example obtains, and to what degree they fool the model. Remember to tune the parameter epsilon to a reasonable value (the original paper uses epsilon = 0.007)

5.- Use the adversarial examples of A with model B, and the adversarial examples of B with model A. Do they fool each other?

6.- With one of the models, generate 60000 new adversarial examples (same size as the training set) and create a new training set containing both the original training data and your adversarial examples. Train both models again with this new training set, and evaluate it with the original MNIST test set. Then answer the following questions:

   - Does classification performance improve?

   - Is the new model less or more susceptible to adversarial examples?

   - Do you think you can use a regularization method in order to make the model less susceptible to adversarial examples?


In [1]:
import numpy as np
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
#%matplotlib inline
import tensorflow.contrib.eager as tfe
%matplotlib qt

In [2]:
# Set Eager API
tfe.enable_eager_execution()

In [4]:
#Load MNIST data from tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [6]:
#Plot images
def plot_img(image, index):
    f, ax = plt.subplots(1, len(index))
    for i in range(len(index)):
        ax[i].imshow(np.reshape(image[index[i]], (28,28)))
    plt.show()
def show_img(image, index):
    plt.imshow(np.reshape(image[index], (28,28)))

In [7]:
#plot_img(mnist.train.images, [1,167,200,400,1099])
#show_img(mnist.train.images, 1)

# tensorflow - logstic regression

In [8]:
# Parameters
learning_rate = 0.5
batch_size = 128
num_steps = 1000
display_step = 100
# Iterator for the dataset
dataset = tf.data.Dataset.from_tensor_slices(
    (mnist.train.images, mnist.train.labels)).batch(batch_size)
dataset_iter = tfe.Iterator(dataset)

In [9]:
# Variables
W = tfe.Variable(tf.zeros([784, 10]), name='weights')
b = tfe.Variable(tf.zeros([10]), name='bias')

#linear combiner (v)
def linear_combiner(inputs):
    return tf.matmul(inputs, W) + b

# Cross-Entropy loss function with logistic resgression
def loss_fn(linear_model, inputs, labels):
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=linear_model(inputs), labels=labels))

# Calculate accuracy
def accuracy_fn(linear_model, inputs, labels, debug=False):
    prediction = tf.nn.softmax(linear_model(inputs))
    correct_pred = tf.equal(tf.argmax(prediction, 1), labels)
    if debug:
        print (prediction)
        print (correct_pred)
    return tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [10]:
# Gradient descent optimizer and provide function to compute gradient
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
grad = tfe.implicit_gradients(loss_fn)

In [None]:
# Training
average_loss = 0.
average_acc = 0.
average_loss_list = []
average_acc_list = []

for step in range(num_steps):
    # Iterate through the dataset
    try:
        d = dataset_iter.next()
    except StopIteration:
        # Refill queue
        dataset_iter = tfe.Iterator(dataset)
        d = dataset_iter.next()

    # Images
    x_batch = d[0]
    # Labels
    y_batch = tf.cast(d[1], dtype=tf.int64)

    batch_loss = loss_fn(linear_combiner, x_batch, y_batch)
    average_loss += batch_loss

    batch_accuracy = accuracy_fn(linear_combiner, x_batch, y_batch)
    average_acc += batch_accuracy

    if step == 0:
        average_loss_list.append(average_loss)
        average_acc_list.append(average_acc)

    # Update the variables (weights) based on gradients
    optimizer.apply_gradients(grad(linear_combiner, x_batch, y_batch))

    if (step + 1) % display_step == 0 or step == 0:
        if step > 0:
            average_loss /= display_step
            average_acc /= display_step
            average_loss_list.append(average_loss)
            average_acc_list.append(average_acc)
            print(average_acc)
        average_loss = 0.
        average_acc = 0.

In [None]:
average_loss_list = np.asarray(average_loss_list)
average_acc_list = np.asarray(average_acc_list)
plt.plot(average_acc_list,label="Average accuracy")
#plt.plot(average_loss_list,label="Average loss")
plt.legend()
plt.grid(True)

In [None]:
#Validation accuracy
validation_images = mnist.validation.images[0:100]
validation_labels = mnist.validation.labels[0:100]
#plot_img(validation_images, [1,41,71])
#Accuracy of validation
test_acc = accuracy_fn(linear_combiner, validation_images, validation_labels)
print("Validation accuracy: {:.4f}".format(test_acc))
# Test set accuracy
testX = mnist.test.images
testY = mnist.test.labels
test_acc = accuracy_fn(linear_combiner, testX, testY)
print("Test set accuracy: {:.4f}".format(test_acc))

In [None]:
#Adversial examples
validation_images += 0.007
random_image = np.random.uniform(0,0.001,784)
adversial_validation = np.copy(validation_images) * random_image
adversial_validation = tf.cast(adversial_validation, tf.float32)
plot_img(adversial_validation, [1,41,71])
#Accuracy
test_acc = accuracy_fn(linear_combiner, adversial_validation, validation_labels)
print("Adversial validation Accuracy: {:.4f}".format(test_acc))

In [31]:
class mnistTwoClassifiers(object):
    def __init__(self):
        self.step_display = 100
    
    def update_param(self, nr_inputs, nr_classes, learning_rate, step_size):
        self.inputs = nr_inputs
        self.nr_classes = nr_classes
        self.W = tfe.Variable(tf.zeros([nr_inputs, nr_classes]), name='weights')
        self.b = tfe.Variable(tf.zeros([nr_classes]), name='biases')
        self.learning_rate = learning_rate
        self.step_size = step_size
        
        # Gradient descent optimizer and provide function to compute gradient
        self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        self.grad = tfe.implicit_gradients(self.logistic_loss_fn)
        
    def update_dataset(self, images, labels, batch_size):
        self.dataset = tf.data.Dataset.from_tensor_slices((images, labels)).batch(batch_size)
        #iterator dataset for each batch
        self.dataset_iter = tfe.Iterator(self.dataset)
    
    #linear combiner (v)
    def linear_combiner(self, inputs):
        return tf.matmul(inputs, self.W) + self.b

    # logistic regression
    def logistic_loss_fn(self, linear_model, inputs, labels):
        return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=linear_model(inputs), labels=labels))
    
    def logistic_regression_scikit(self, y, labels):
        model = LogisticRegression()
        model.fit(y, labels)
        return model
    
    # Calculate accuracy
    def logistic_accuracy_fn(self, linear_model, inputs, labels, debug=False):
        prediction = tf.nn.softmax(linear_model(inputs))
        correct_pred = tf.equal(tf.argmax(prediction, 1), labels)
        if debug:
            print (prediction)
            print (correct_pred)
        return tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    def save_model(self, name):        
        sess = tf.Session()
        model_saver = tf.train.Saver({"weights": self.W, "biases": self.b})
        model_saver.save(sess, "/tmp/"+str(name)+".ckpt")
        print("Model saved")
    
    def load_model(self, name):
        W = tfe.Variable(tf.zeros([self.nr_inputs, self.nr_classes]), name='weights')
        b = tfe.Variable(tf.zeros([self.nr_classes]), name='biases')
        sess = tf.Session()
        saver = tf.train.Saver({"weights": W, "biases": b})
        saver.restore(sess, str(name))
        
        return W, b
    
    def train_logistic_reg(self):
        # Training
        average_loss = 0.
        average_acc = 0.
        average_loss_list = []
        average_acc_list = []

        for step_ in range(self.step_size):
            # Iterate through the dataset
            try:
                d = self.dataset_iter.next()
            except StopIteration:
                self.dataset_iter = tfe.Iterator(self.dataset)
                d = self.dataset_iter.next()
            # Images and labels
            x_batch = d[0]
            y_batch = tf.cast(d[1], dtype=tf.int64)

            batch_loss = self.logistic_loss_fn(self.linear_combiner, x_batch, y_batch)
            average_loss += batch_loss

            batch_accuracy = self.logistic_accuracy_fn(self.linear_combiner, x_batch, y_batch)
            average_acc += batch_accuracy

            if step_ == 0:
                average_loss_list.append(average_loss)
                average_acc_list.append(average_acc)

            # Update the variables (weights) based on gradients
            self.optimizer.apply_gradients(self.grad(self.linear_combiner, x_batch, y_batch))

            if (step_ + 1) % self.step_display == 0 or step_ == 0:
                if step_ > 0:
                    average_loss /= self.step_display
                    average_acc /= self.step_display
                    average_loss_list.append(average_loss)
                    average_acc_list.append(average_acc)
                    print(average_acc)
                average_loss = 0.
                average_acc = 0.

In [32]:
nr_inputs = 784
nr_classes = 10
learning_rate = 0.5
step_size = 1000
train_images = mnist.train.images
train_labels = mnist.train.labels
batch_size = 100

mnist_classifiers = mnistTwoClassifiers()
mnist_classifiers.update_param(nr_inputs, nr_classes, learning_rate, step_size)
mnist_classifiers.update_dataset(train_images, train_labels, batch_size)
mnist_classifiers.train_logistic_reg()
mnist_classifiers.save_model("logistic_reg")

tf.Tensor(0.8150998, shape=(), dtype=float32)
tf.Tensor(0.89120024, shape=(), dtype=float32)
tf.Tensor(0.8918998, shape=(), dtype=float32)
tf.Tensor(0.9011999, shape=(), dtype=float32)
tf.Tensor(0.9001001, shape=(), dtype=float32)
tf.Tensor(0.9184, shape=(), dtype=float32)
tf.Tensor(0.9080999, shape=(), dtype=float32)
tf.Tensor(0.9146998, shape=(), dtype=float32)
tf.Tensor(0.90470016, shape=(), dtype=float32)
tf.Tensor(0.9118001, shape=(), dtype=float32)
Model saved


In [35]:
logistic_reg = LogisticRegression()
X_test = train_images
y_test = train_labels

In [42]:
sample_size = 500
X_train = train_images[0:sample_size]
y_train = train_labels[:sample_size]
%time logistic_reg.fit(X_train, y_train)
#regr.score(X_test, y_test)
logistic_reg.score(train_images[50:100], train_labels[50:100])

CPU times: user 120 ms, sys: 0 ns, total: 120 ms
Wall time: 121 ms


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)