In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile

import searchnets

import mnist
import mnist_contrib
import fc


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [2]:
TRAIN_IMAGES = './mnist/train-images-idx3-ubyte.gz'

with gfile.Open(TRAIN_IMAGES, 'rb') as f:
    x_train = mnist_contrib.extract_images(f)
    
TRAIN_LABELS = './mnist/train-labels-idx1-ubyte.gz'
with gfile.Open(TRAIN_LABELS, 'rb') as f:
    y_train = mnist_contrib.extract_labels(f)

Extracting ./mnist/train-images-idx3-ubyte.gz
Extracting ./mnist/train-labels-idx1-ubyte.gz


In [3]:
y_train_01 = np.copy(y_train)

In [4]:
inds_0 = np.where(np.isin(y_train, np.asarray([0,1,2,3,4])))[0]
y_train_01[inds_0] = 0
inds_1 = np.where(np.isin(y_train, np.asarray([5,6,7,8,9])))[0]
y_train_01[inds_1] = 1

x_train_01 = np.pad(x_train, ((0,0), (2, 2), (2,2), (0, 0)), mode='constant')  # so size is ( n x 32 x 32 x 1 )

In [5]:
# stoled from https://github.com/sujaybabruwad/LeNet-in-Tensorflow/blob/master/LeNet-Lab.ipynb

class LeNet:
    def __init__(self):    
        self.mu = 0
        self.sigma = 0.1
        self.layer_depth = {
            'layer_1' : 6,
            'layer_2' : 16,
            'layer_3' : 120,
            'layer_f1' : 84
        }
        self.fc2 = None
        self.logits = None

    def build(self, x, n_classes=10):
        # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
        conv1_w = tf.Variable(tf.truncated_normal(shape = [5,5,1,6], mean = self.mu, stddev = self.sigma))
        conv1_b = tf.Variable(tf.zeros(6))
        conv1 = tf.nn.conv2d(x, conv1_w, strides = [1,1,1,1], padding = 'VALID') + conv1_b 
        conv1 = tf.nn.relu(conv1)

        # Pooling. Input = 28x28x6. Output = 14x14x6.
        pool_1 = tf.nn.max_pool(conv1,ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')

        # Layer 2: Convolutional. Output = 10x10x16.
        conv2_w = tf.Variable(tf.truncated_normal(shape = [5,5,6,16], mean = self.mu, stddev = self.sigma))
        conv2_b = tf.Variable(tf.zeros(16))
        conv2 = tf.nn.conv2d(pool_1, conv2_w, strides = [1,1,1,1], padding = 'VALID') + conv2_b
        conv2 = tf.nn.relu(conv2)

        # Pooling. Input = 10x10x16. Output = 5x5x16.
        pool_2 = tf.nn.max_pool(conv2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID') 

        # Flatten. Input = 5x5x16. Output = 400.
        fc1 = tf.layers.flatten(pool_2)

        # Layer 3: Fully Connected. Input = 400. Output = 120.
        fc1_w = tf.Variable(tf.truncated_normal(shape = (400,120), mean = self.mu, stddev = self.sigma))
        fc1_b = tf.Variable(tf.zeros(120))
        fc1 = tf.matmul(fc1,fc1_w) + fc1_b
        fc1 = tf.nn.relu(fc1)

        # Layer 4: Fully Connected. Input = 120. Output = 84.
        fc2_w = tf.Variable(tf.truncated_normal(shape = (120,84), mean = self.mu, stddev = self.sigma))
        fc2_b = tf.Variable(tf.zeros(84))
        fc2 = tf.matmul(fc1,fc2_w) + fc2_b
        fc2 = tf.nn.relu(fc2)
        self.fc2 = fc2

        # Layer 5: Fully Connected. Input = 84. Output = n_classes.
        fc3_w = tf.Variable(tf.truncated_normal(shape = (84, n_classes), mean = self.mu , stddev = self.sigma))
        fc3_b = tf.Variable(tf.zeros(n_classes))
        logits = tf.matmul(fc2, fc3_w) + fc3_b
        self.logits = logits

In [6]:
input_shape = [None] + list(x_train_01.shape[1:])

n_classes = 2

In [9]:
lenet = LeNet()

graph = tf.Graph()
with graph.as_default():
    x = tf.placeholder(tf.float32, shape=input_shape, name='X')
    y = tf.placeholder(tf.int32, shape=[None], name='Y')
    y_onehot = tf.one_hot(y, depth=n_classes)

    lenet.build(x=x, n_classes=2)
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_onehot, logits=lenet.logits), name='loss')
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate, name='SGD-op').minimize(loss)
    correct_prediction = tf.equal(tf.argmax(lenet.logits, 1), tf.argmax(y_onehot, 1), name='correct_pred')
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

    init = tf.global_variables_initializer()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [8]:
def randomize(x, y):
    """ Randomizes the order of data samples and their corresponding labels"""
    permutation = np.random.permutation(y.shape[0])
    shuffled_x = x[permutation, :]
    shuffled_y = y[permutation]
    return shuffled_x, shuffled_y

def get_next_batch(x, y, start, end):
    x_batch = x[start:end]
    y_batch = y[start:end]
    return x_batch, y_batch

In [9]:
def get_val(x_train, y_train, val_size):
    x_val = x_train[-val_size:]
    y_val = y_train[-val_size:]
    x_train = x_train[:-val_size]
    y_train = y_train[:-val_size]
    return x_train, y_train, x_val, y_val

In [10]:
x_train_01, y_train_01, x_val_01, y_val_01 = get_val(x_train_01, y_train_01, val_size=10000)

In [14]:
sess = tf.Session(graph=graph)
with sess:
    sess.run(init)
    global_step = 0
    # Number of training iterations in each epoch
    num_tr_iter = int(len(y_train_01) / batch_size)
    for epoch in range(epochs):
        print('Training epoch: {}'.format(epoch + 1))
        x_tr_epoch, y_tr_epoch = randomize(x_train_01, y_train_01)
        for iteration in range(num_tr_iter):
            global_step += 1
            start = iteration * batch_size
            end = (iteration + 1) * batch_size
            x_batch, y_batch = get_next_batch(x_tr_epoch, y_tr_epoch, start, end)

            # Run optimization op (backprop)
            feed_dict_batch = {x: x_batch, y: y_batch}
            sess.run(optimizer, feed_dict=feed_dict_batch)

            if iteration % display_freq == 0:
                # Calculate and display the batch loss and accuracy
                loss_batch, acc_batch = sess.run([loss, accuracy],
                                                 feed_dict=feed_dict_batch)

                print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
                      format(iteration, loss_batch, acc_batch))

        # Run validation after every epoch
        feed_dict_valid = {x: x_val_01[:1000], y: y_val_01[:1000]}
        loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
        print('---------------------------------------------------------')
        print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
              format(epoch + 1, loss_valid, acc_valid))
        print('---------------------------------------------------------')


Training epoch: 1
iter   0:	 Loss=9.39,	Training Accuracy=37.0%
iter 100:	 Loss=2.45,	Training Accuracy=64.0%
iter 200:	 Loss=1.14,	Training Accuracy=76.0%
iter 300:	 Loss=0.73,	Training Accuracy=81.0%
iter 400:	 Loss=0.47,	Training Accuracy=82.0%
---------------------------------------------------------
Epoch: 1, validation loss: 0.68, validation accuracy: 81.2%
---------------------------------------------------------
Training epoch: 2
iter   0:	 Loss=0.48,	Training Accuracy=86.0%
iter 100:	 Loss=0.70,	Training Accuracy=77.0%
iter 200:	 Loss=0.39,	Training Accuracy=91.0%
iter 300:	 Loss=0.43,	Training Accuracy=80.0%
iter 400:	 Loss=0.33,	Training Accuracy=88.0%
---------------------------------------------------------
Epoch: 2, validation loss: 0.51, validation accuracy: 83.6%
---------------------------------------------------------
Training epoch: 3
iter   0:	 Loss=0.41,	Training Accuracy=82.0%
iter 100:	 Loss=0.42,	Training Accuracy=82.0%
iter 200:	 Loss=0.44,	Training Accuracy=84

In [11]:
def pairwise_squared_distance(A):
    """compute pairwise squared distances between a batch of row vectors A.
    Returns distance matrix D, where:
        D[i,j] = (a[i]-a[j])(a[i]-a[j])'
    
    Parameters:
    -----------
    A : tensorflow.Tensor
        of rank 2, where each row a[i] is a feature vector output from a batch of input samples
    
    Returns
    -------
    D : tensorflow.Tensor
        distance matrix, where D[i, j] is the pairwise squared distance between a[i] and a[j]

    adapted from https://stackoverflow.com/questions/37009647/compute-pairwise-distance-in-a-batch-without-replicating-tensor-in-tensorflow
    """
    # r[i] is squared norm of ith row of the original matrix
    r = tf.reduce_sum(A*A, 1)

    # turn r into column vector
    r = tf.reshape(r, [-1, 1])
    D = r - 2*tf.matmul(A, tf.transpose(A)) + tf.transpose(r)
    return D

In [11]:
def squared_dist(A, B=None):
    if B is None:
        B = A

    row_norms_A = tf.reduce_sum(tf.square(A), axis=1)
    row_norms_A = tf.reshape(row_norms_A, [-1, 1])  # Column vector.

    row_norms_B = tf.reduce_sum(tf.square(B), axis=1)
    row_norms_B = tf.reshape(row_norms_B, [1, -1])  # Row vector.

    return row_norms_A - 2 * tf.matmul(A, tf.transpose(B)) + row_norms_B

In [28]:
def euclid_dist(A, B=None):
    if B is None:
        B = A
    D = tf.sqrt(
        tf.abs(squared_dist(A, B) + 1e-6)
    )
    return D

In [35]:
lenet = LeNet()

graph = tf.Graph()
with graph.as_default():
    x = tf.placeholder(tf.float32, shape=input_shape, name='X')
    y = tf.placeholder(tf.int32, shape=[None], name='Y')
    y_onehot = tf.one_hot(y, depth=n_classes)

    lenet.build(x=x, n_classes=2)
    
    ce_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_onehot, logits=lenet.logits), name='loss')

    # t = target, d = distractor
    t_inds = tf.where(tf.math.equal(y, 1))
    t_vecs = tf.gather(lenet.fc2, t_inds)
    t_vecs = tf.squeeze(t_vecs)
    t_distances = euclid_dist(t_vecs)
    mu_t_dist = tf.reduce_mean(t_distances)

    d_inds = tf.where(tf.math.equal(y, 0))
    d_vecs = tf.gather(lenet.fc2, d_inds)
    d_vecs = tf.squeeze(d_vecs)
    d_distances = euclid_dist(d_vecs)
    mu_d_dist = tf.reduce_mean(d_distances)
    
    t_d_distances = euclid_dist(t_vecs, d_vecs)
    mu_t_d_distances = tf.reduce_mean(t_d_distances)
    
    distance_loss = mu_t_dist + mu_d_dist + (1 / mu_t_d_distances)

    loss = ce_loss + distance_loss
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate, name='SGD-op').minimize(loss)
    preds = tf.argmax(lenet.logits, 1)
    correct_prediction = tf.equal(preds, tf.argmax(y_onehot, 1), name='correct_pred')
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

    init = tf.global_variables_initializer()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [45]:
# Hyper-parameters
epochs = 10             # Total number of training epochs
batch_size = 100        # Training batch size
display_freq = 100      # Frequency of displaying the training results
learning_rate = 0.001   # The optimization initial learning rate

In [46]:
sess = tf.Session(graph=graph)
with sess:
    sess.run(init)
    global_step = 0
    # Number of training iterations in each epoch
    num_tr_iter = int(len(y_train_01) / batch_size)
    for epoch in range(epochs):
        print('Training epoch: {}'.format(epoch + 1))
        x_tr_epoch, y_tr_epoch = randomize(x_train_01, y_train_01)
        for iteration in range(num_tr_iter):
            global_step += 1
            start = iteration * batch_size
            end = (iteration + 1) * batch_size
            x_batch, y_batch = get_next_batch(x_tr_epoch, y_tr_epoch, start, end)

            # Run optimization op (backprop)
            feed_dict_batch = {x: x_batch, y: y_batch}
            sess.run(optimizer, feed_dict=feed_dict_batch)

            if iteration % display_freq == 0:
                # Calculate and display the batch loss and accuracy
                preds_b, correct_b, dist_loss_b, loss_batch, acc_batch = sess.run([preds, correct_prediction, 
                                                                                   distance_loss, loss, accuracy],
                                                 feed_dict=feed_dict_batch)

                print("iter {0:3d}:\t Dist. Loss={1:.2f},\tLoss={2:.2f},\tTraining Accuracy={3:.01%}".
                      format(iteration, dist_loss_b, loss_batch, acc_batch))

        # Run validation after every epoch
        feed_dict_valid = {x: x_val_01[:1000], y: y_val_01[:1000]}
        loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
        print('---------------------------------------------------------')
        print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
              format(epoch + 1, loss_valid, acc_valid))
        print('---------------------------------------------------------')


Training epoch: 1
iter   0:	 Dist. Loss=316.84,	Loss=330.39,	Training Accuracy=29.0%
iter 100:	 Dist. Loss=2.83,	Loss=3.52,	Training Accuracy=48.0%
iter 200:	 Dist. Loss=2.83,	Loss=3.52,	Training Accuracy=50.0%
iter 300:	 Dist. Loss=2.83,	Loss=3.52,	Training Accuracy=46.0%
iter 400:	 Dist. Loss=2.80,	Loss=3.48,	Training Accuracy=56.0%
---------------------------------------------------------
Epoch: 1, validation loss: 3.52, validation accuracy: 47.1%
---------------------------------------------------------
Training epoch: 2
iter   0:	 Dist. Loss=2.80,	Loss=3.49,	Training Accuracy=54.0%
iter 100:	 Dist. Loss=2.80,	Loss=3.49,	Training Accuracy=49.0%
iter 200:	 Dist. Loss=2.76,	Loss=3.45,	Training Accuracy=50.0%
iter 300:	 Dist. Loss=2.83,	Loss=3.52,	Training Accuracy=49.0%
iter 400:	 Dist. Loss=2.73,	Loss=3.42,	Training Accuracy=48.0%
---------------------------------------------------------
Epoch: 2, validation loss: 3.50, validation accuracy: 45.5%
------------------------------------

In [None]:
acc_batch

Not working, I think because the loss function is dominated by the distance values.

Perhaps better to have the network learn two functions simultaneously, e.g. a score function and the classification.

So literally do siamese training, just add a score head, back-prop through that.

In fact, probably use triplet loss. See:
https://omoindrot.github.io/triplet-loss
https://github.com/omoindrot/tensorflow-triplet-loss
But do it at the same time as cross-entropy, i.e. be able to do both