In [1]:
"""
This compares DIVA to MLP with extremely small numbers of hidden units:1 to 10; most MNIST MLP scripts use ~500.
DIVA does surprisingly well at very low hidden units, considering that MLP falls apart. 
I've tested MLP with 1 hidden and ~100 epochs once and it didn't improve much.
As DIVA is based on autoencoders, it is plausible that it performs well when 
dimensionality reduction would otherwise be required, and that appears to be the case here.

As an aside, if there is more 'standard' jargon for this sort of test, please notify the author. 
"""


import DIVA

Using Theano backend.
Using gpu device 0: GeForce GTX 960 (CNMeM is enabled with initial size: 70.0% of memory, cuDNN 4007)


In [2]:
#this cell is taken almost verbatim from the keras examples.
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop 
from keras.utils import np_utils

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

nb_classes = 10

#categorical encoding for softmax MLPs
Y_train_cat = np_utils.to_categorical(y_train, nb_classes)
Y_test_cat = np_utils.to_categorical(y_test, nb_classes)

In [None]:
#hyperparameters
nb_epoch = 50
input_shape = 784
MLP_batch_size=1


MLP_accuracy=[]
DIVA_accuracy=[]

for x in range(1,10):
    num_hidden = x
    print('Testing %d hidden units'%num_hidden)

    mlp = Sequential()
    mlp.add(Dense(num_hidden, input_shape=(input_shape,)))
    mlp.add(Activation('relu'))
    mlp.add(Dense(10))
    mlp.add(Activation('softmax'))

    mlp.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])

    print('Training MLP')

    history = mlp.fit(X_train, Y_train_cat,
                        batch_size=MLP_batch_size, nb_epoch=nb_epoch,
                        verbose=0, validation_data=(X_test, Y_test_cat))
    score = mlp.evaluate(X_test, Y_test_cat, verbose=0)
    
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    MLP_accuracy.append(score[1])

    continue
    print('Training DIVA')
    #compile model
    diva_model = DIVA.diva(nb_classes, input_shape, num_hidden, hidden_act='relu', 
                           loss='mean_squared_error', optimizer=SGD(), compare=DIVA.compareMAE)

    #train model
    train_metrics=diva_model.train(X_train, y_train, nb_epoch, 1, X_test, y_test)            

    #test model
    DIVA_accuracy.append(diva_model.test(X_test, y_test, 1)) 

print('Done')

Testing 1 hidden units
Training MLP


In [4]:
#TODO: graph loss and accuracy in matplotlib

In [5]:
print(MLP_accuracy)

[0.2094,
 0.30509999999999998,
 0.38109999999999999,
 0.74839999999999995,
 0.67169999999999996,
 0.84389999999999998,
 0.90069999999999995,
 0.91369999999999996,
 0.90380000000000005]

In [6]:
print(DIVA_accuracy)

[0.675, 0.6754, 0.7608, 0.8355, 0.7178, 0.8511, 0.7217, 0.8692, 0.7866]