In [1]:
"""
Compares slightly deep (2 hidden layer) MLP to DIVA.
Based on the Keras MLP example.
"""
import DIVA

from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils

nb_classes = 10
nb_epoch = 50

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)



Using Theano backend.
Using gpu device 0: GeForce GTX 960 (CNMeM is enabled with initial size: 70.0% of memory, cuDNN 4007)


60000 train samples
10000 test samples


In [None]:
#Testing Deep MLP
batch_size=20

model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=0, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
dense_1 (Dense)                    (None, 512)         401920      dense_input_1[0][0]              
____________________________________________________________________________________________________
activation_1 (Activation)          (None, 512)         0           dense_1[0][0]                    
____________________________________________________________________________________________________
dropout_1 (Dropout)                (None, 512)         0           activation_1[0][0]               
____________________________________________________________________________________________________
dense_2 (Dense)                    (None, 512)         262656      dropout_1[0][0]                  
___________________________________________________________________________________________

In [None]:
"""
Py-DIVA can set up DIVA at the end of an existing model. 
I don't want to diverge too much from base Keras, 
so this is intentionally the only way to run deep DIVAs. 
"""

premodel = Sequential()
premodel.add(Dense(512, input_shape=(784,)))
premodel.add(Activation('relu'))
premodel.add(Dropout(0.2))
premodel.add(Dense(512))
premodel.add(Activation('relu'))

num_hidden=512
input_shape=784

#compile model
diva_model = DIVA.diva(nb_classes, input_shape, num_hidden, 
                       hidden_act='relu', loss='mean_squared_error', 
                       optimizer=SGD(), prev_model=premodel,
                       compare=DIVA.compareMSE)

diva_model.channels[0].summary()

#train model
train_metrics=diva_model.train(X_train, y_train, nb_epoch, 1, X_test, y_test)            
            
#test model
accuracy=diva_model.test(X_test, y_test, 1) 

print('Done')


____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
dense_4 (Dense)                    (None, 512)         401920      dense_input_2[0][0]              
____________________________________________________________________________________________________
activation_4 (Activation)          (None, 512)         0           dense_4[0][0]                    
____________________________________________________________________________________________________
dropout_3 (Dropout)                (None, 512)         0           activation_4[1][0]               
____________________________________________________________________________________________________
dense_5 (Dense)                    (None, 512)         262656      dropout_3[1][0]                  
___________________________________________________________________________________________