In [2]:
import tensorflow as tf
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
import numpy as np
import pickle
from sklearn.externals import joblib
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
#Load MNIST data from tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)
mnist_one_hot = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [6]:
def plot_img(image, index):
    f, ax = plt.subplots(1, len(index))
    for i in range(len(index)):
        ax[i].imshow(np.reshape(image[index[i]], (28,28)), cmap='Greys')
        ax[i].set_yticklabels([])
        ax[i].set_xticklabels([])
    plt.show()

In [7]:
class mnistTwoClassifiers(object):
    def __init__(self):
        self.step = 100
    
    def update_datasets(self, train_images, train_labels, test_images, test_labels):
        self.train_images = train_images
        self.train_labels = train_labels
        self.test_images = test_images
        self.test_labels = test_labels
    
    def logistic_regression(self, train_size):
        self.logistic_model = LogisticRegression()
        %time self.logistic_model.fit(self.train_images[:train_size], self.train_labels[:train_size])
        return self.logistic_model
    
    def random_forest(self, train_size):
        self.random_forest_model = RandomForestClassifier()
        %time self.random_forest_model.fit(self.train_images[:train_size], self.train_labels[:train_size])
        return self.random_forest_model
    
    def generate_adversarial_example(self, num_of_samples, clf, epsilon, test_images, test_labels):
        adv_samples = np.zeros((num_of_samples, 784))
        adv_true_samples = np.zeros((num_of_samples, 1))
        for i in range(num_of_samples):
            y_predict = None
            if clf == "logistic_regression":
                y_predict = self.logistic_model.predict_proba(test_images[i:i+1])
            elif clf == "random_forest":
                y_predict = self.random_forest_model.predict_proba(test_images[i:i+1])
        
            y_true = test_labels[i:i+1]
            y_true_index = np.where(y_true == 1)[1][0]
            predictions = y_predict
            error = (predictions - y_true)**(2)
            error = error[0][y_true_index]
            gradient = error * train_images[i:i+1]
            gradient /= len(train_images[i:i+1])
            signs = np.sign(gradient)
            img_adversarial = epsilon * signs * test_images[i:i+1]
            adv_samples[i] = img_adversarial
            adv_true_samples[i] = y_true_index
        adv_samples = np.asarray(adv_samples)
        return adv_samples, adv_true_samples
        
    def test(self, test_size, classifier):
        if classifier == "logistic_regression":
            print(self.logistic_model.score(self.test_images[:test_size], self.test_labels[:test_size]))
        elif classifier == "random_forest":
            print(self.random_forest_model.score(self.test_images[:test_size], self.test_labels[:test_size]))
            
    def predict(self, model, test_images, test_size):
        return model.predict(test_images[:test_size])
    
    def test_with_adversarial(self, model, adv_example):
        return model.predict(adv_example)
        

In [8]:
train_images = mnist.train.images
train_labels = mnist.train.labels
test_images = mnist.test.images
test_labels = mnist.test.labels
test_labels_one_hot = mnist_one_hot.test.labels
test_images_one_hot = mnist_one_hot.test.images
train_labels_one_hot = mnist_one_hot.train.labels
train_images_one_hot = mnist_one_hot.train.images
two_clfs = mnistTwoClassifiers()
two_clfs.update_datasets(train_images, train_labels, test_images, test_labels)

# Task 1 -5

In [9]:
#train the models
logistic_model = two_clfs.logistic_regression(55000)
random_forest_model = two_clfs.random_forest(55000)
clf_logistic = two_clfs.logistic_model
clf_random_forest = two_clfs.random_forest_model
#Save classifier to pickle file
joblib.dump(clf_logistic, 'logistic_regression.pkl') 
joblib.dump(clf_random_forest, 'random_forest_model.pkl')

CPU times: user 1min 11s, sys: 344 ms, total: 1min 11s
Wall time: 1min 12s
CPU times: user 3.43 s, sys: 4 ms, total: 3.43 s
Wall time: 3.44 s


['random_forest_model.pkl']

In [10]:
#Load classifier
logistic_clf = joblib.load('/tmp/logistic_regression.pkl')
random_forest_clf = joblib.load('/tmp/random_forest_model.pkl')

In [11]:
#test the model
two_clfs.test(100, "logistic_regression")
two_clfs.test(100, "random_forest")

0.97
0.97


In [16]:
#Use test dataset
num_adv_example = 100
epsilon = 0.007
adv_imgs_logistic, labels = two_clfs.generate_adversarial_example(num_adv_example, "logistic_regression", \
                                                 epsilon, test_images_one_hot, test_labels_one_hot)
adv_imgs_rnd_forest, labels = two_clfs.generate_adversarial_example(num_adv_example, "random_forest", \
                                                 epsilon, test_images_one_hot, test_labels_one_hot)

In [17]:
#Feed adversarial example from logistic to random forest
#clf_random_forest.predict(adv_imgs_logistic)
clf_random_forest.score(adv_imgs_logistic, test_labels[:num_adv_example])

0.19

In [18]:
#Feed adversarial example from random forest to logistic model
clf_logistic.score(adv_imgs_rnd_forest, train_labels[:num_adv_example])

0.08

# Task 6
# Generate 55000 adversarial examples

In [19]:
#Use test dataset
num_adv_example = 55000
epsilon = 0.007
adv_imgs_logistic, logic_labels = two_clfs.generate_adversarial_example(num_adv_example, "logistic_regression", \
                                                 epsilon, train_images_one_hot, train_labels_one_hot)
adv_imgs_rnd_forest, forest_labels = two_clfs.generate_adversarial_example(num_adv_example, "random_forest", \
                                                 epsilon, train_images_one_hot, train_labels_one_hot)

In [20]:
np.save("adv_imgs_logistic", adv_imgs_logistic)
np.save("logic_labels", logic_labels)
np.save("adv_imgs_rnd_forest", adv_imgs_rnd_forest)
np.save("forest_labels", forest_labels)