In [1]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.objectives import categorical_crossentropy
from keras.datasets import mnist

Using Theano backend.
Using gpu device 0: GeForce GT 750M (CNMeM is enabled with initial size: 75.0% of memory, CuDNN 4007)


In [2]:
%time (X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

CPU times: user 1.15 s, sys: 565 ms, total: 1.71 s
Wall time: 1.71 s
(60000, 'train samples')
(10000, 'test samples')


In [3]:
%time P = np.load('P.npy') # load pre-computed joint probabilities
batch_size = P.shape[1]

CPU times: user 899 µs, sys: 1.41 s, total: 1.41 s
Wall time: 1.41 s


In [4]:
from keras import backend as K

# P is the joint probabilities for this batch (Keras loss functions call this y_true)
# activations is the low-dimensional output (Keras loss functions call this y_pred)
def tsne(P, activations):
#     d = K.shape(activations)[1]
    d = 2 # TODO: should set this automatically, but the above is very slow for some reason
    n = batch_size # TODO: should set this automatically
    v = d - 1.
    eps = K.variable(10e-15) # needs to be at least 10e-8 to get anything after Q /= K.sum(Q)
    sum_act = K.sum(K.square(activations), axis=1)
    Q = K.reshape(sum_act, [-1, 1]) + -2 * K.dot(activations, K.transpose(activations))
    Q = (sum_act + Q) / v
    Q = K.pow(1 + Q, -(v + 1) / 2)
    Q *= K.variable(1 - np.eye(n))
    Q /= K.sum(Q)
    Q = K.maximum(Q, eps)
    C = K.log((P + eps) / (Q + eps))
    C = K.sum(P * C)
    return C

In [5]:
model = Sequential()
model.add(Dense(500, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(500))
model.add(Activation('relu'))
model.add(Dense(2000))
model.add(Activation('relu'))
model.add(Dense(2))
# model.build()
# print model.summary()

sgd = SGD(lr=0.01)
%time model.compile(loss=tsne, optimizer=sgd)

CPU times: user 3.51 s, sys: 431 ms, total: 3.94 s
Wall time: 3.97 s


In [6]:
%matplotlib inline
from matplotlib import pyplot as plt

import gc
!mkdir -p plotter
class Plotter(keras.callbacks.Callback):        
    def on_epoch_end(self, epoch, logs={}):
        prediction = self.model.predict(X_test)
        fig = plt.figure(figsize=(8,8))
        plt.scatter(prediction[:,0], prediction[:,1], alpha=1, marker='o', s=3, edgecolor='', c=y_test)
        ax = fig.gca()
        ax.set_autoscale_on(False)
        fig.tight_layout()
        plt.savefig('plotter/%04d.png' % (epoch+1), pad_inches=0)
        plt.close()
        gc.collect()

In [7]:
Y_train = P.reshape(X_train.shape[0], -1)
print(X_train.shape)
print(Y_train.shape)

(60000, 784)
(60000, 5000)


In [None]:
model.fit(X_train, Y_train,
          batch_size=batch_size,
          callbacks=[Plotter()],
          shuffle=False,
          nb_epoch=500,
          verbose=1)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500