In [1]:
import tensorflow as tf
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
import numpy as np
import pickle
from sklearn.externals import joblib
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
#Load MNIST data from tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)
mnist_one_hot = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
def plot_img(image, index):
    f, ax = plt.subplots(1, len(index))
    for i in range(len(index)):
        ax[i].imshow(np.reshape(image[index[i]], (28,28)), cmap='Greys')
        ax[i].set_yticklabels([])
        ax[i].set_xticklabels([])
    plt.show()

In [89]:
class mnistTwoClassifiers(object):
    def __init__(self):
        self.step = 100
    
    def update_datasets(self, train_images, train_labels, test_images, test_labels):
        self.train_images = train_images
        self.train_labels = train_labels
        self.test_images = test_images
        self.test_labels = test_labels
    
    def logistic_regression(self, train_size):
        self.logistic_model = LogisticRegression()
        %time self.logistic_model.fit(self.train_images[:train_size], self.train_labels[:train_size])
        return self.logistic_model
    
    def random_forest(self, train_size):
        self.random_forest_model = RandomForestClassifier()
        %time self.random_forest_model.fit(self.train_images[:train_size], self.train_labels[:train_size])
        return self.random_forest_model
    
    def generate_adversarial_example(self, num_of_samples, clf, epsilon, test_images, test_labels):
        adv_samples = []
        for i in range(num_of_samples):
            y_predict = None
            if clf == "logistic_regression":
                y_predict = self.logistic_model.predict_proba(test_images[i:i+1])
            elif clf == "random_forest":
                y_predict = self.random_forest_model.predict_proba(test_images[0:1])
        
            y_true = test_labels[i:i+1]
            y_true_index = np.where(y_true == 1)[1][0]
            predictions = y_predict
            error = (predictions - y_true)**(2)
            gradient = np.dot(train_images[i:i+1].T, error)
            gradient /= len(train_images[i:i+1])
            gradient = gradient.T
            tf_gradient = tf.cast(gradient, tf.float32)
            signs = tf.sign(tf_gradient)
            eps = tf.constant(epsilon)
            img_adversarial = tf.add(tf.multiply(eps, signs), test_images[i:i+1])
            img_adversarial = tf.Session().run(img_adversarial[y_true_index])
            #plot_img(img_adversarial, [y_true_index, y_true_index])
            adv_samples.append(img_adversarial)

        adv_samples = np.asarray(adv_samples)
        return adv_samples
        
    def test(self, test_size, classifier):
        if classifier == "logistic_regression":
            print(self.logistic_model.score(self.test_images[:test_size], self.test_labels[:test_size]))
        elif classifier == "random_forest":
            print(self.random_forest_model.score(self.test_images[:test_size], self.test_labels[:test_size]))
            
    def predict(self, model, test_images, test_size):
        return model.predict(test_images[:test_size])
    
    def test_with_adversarial(self, model, adv_example):
        return model.predict(adv_example)
        

In [90]:
train_images = mnist.train.images
train_labels = mnist.train.labels
test_images = mnist.test.images
test_labels = mnist.test.labels
test_labels_one_hot = mnist_one_hot.test.labels
test_images_one_hot = mnist_one_hot.test.images
two_clfs = mnistTwoClassifiers()
two_clfs.update_datasets(train_images, train_labels, test_images, test_labels)

In [91]:
#train the models
logistic_model = two_clfs.logistic_regression(1000)
random_forest_model = two_clfs.random_forest(1000)
clf_logistic = two_clfs.logistic_model
clf_random_forest = two_clfs.random_forest_model

CPU times: user 232 ms, sys: 4 ms, total: 236 ms
Wall time: 234 ms
CPU times: user 40 ms, sys: 0 ns, total: 40 ms
Wall time: 37.7 ms


In [92]:
#test the model
two_clfs.test(100, "logistic_regression")
two_clfs.test(100, "random_forest")

0.87
0.83


In [96]:
#Use test dataset
num_adv_example = 100
epsilon = 0.25
adv_imgs_logistic = two_clfs.generate_adversarial_example(num_adv_example, "logistic_regression", \
                                                 epsilon, test_images_one_hot, test_labels_one_hot)
adv_imgs_rnd_forest = two_clfs.generate_adversarial_example(num_adv_example, "random_forest", \
                                                 epsilon, test_images_one_hot, test_labels_one_hot)

In [97]:
#Feed adversarial example from logistic to random forest
#clf_random_forest.predict(adv_imgs_logistic)
clf_random_forest.score(adv_imgs_logistic, test_labels[:num_adv_example])

0.39

In [98]:
#Feed adversarial example from random forest to logistic model
clf_logistic.score(adv_imgs_rnd_forest, test_labels[:num_adv_example])

0.77

# Generate 55000 adversarial examples

In [None]:
#Use test dataset
num_adv_example = 55000
epsilon = 0.25
adv_images_logistic = two_clfs.generate_adversarial_example(num_adv_example, "logistic_regression", \
                                                 epsilon, test_images_one_hot, test_labels_one_hot)
adv_images_rnd_forest = two_clfs.generate_adversarial_example(num_adv_example, "random_forest", \
                                                 epsilon, test_images_one_hot, test_labels_one_hot)

In [None]:
#Save classifier to pickle file
joblib.dump(logistic_model, '/tmp/logistic_regression.pkl') 
joblib.dump(random_forest_model, '/tmp/random_forest_model.pkl')

In [None]:
#Load classifier
logistic_clf = joblib.load('/tmp/logistic_regression.pkl')
random_forest_clf = joblib.load('/tmp/random_forest_model.pkl')