In [1]:
"""
Compares convolutional MLPs to convolutional DIVA.
Based on the code from the Keras CNN example.

With the major caveat that I've only run this twice, due to time constraints it appears
that convolutional DIVA takes forever to converge, and runtime per epoch is also quite long.
Increasing the epochs(12 to 50) and the hidden units(128 to 512) doesn't appear to 
help much, though it still gets decent (~82%) accuracy even when loss is pretty high.
It is not known if DIVA doesn't work well with convolution or if SGD is insufficient.
"""

import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils

nb_classes = 10

# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#for DIVA output
alt_X_train = X_train.reshape(60000, 784)
alt_X_test = X_test.reshape(10000, 784)

#for convolutional layer
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)


Using Theano backend.
Using gpu device 0: GeForce GTX 960 (CNMeM is enabled with initial size: 70.0% of memory, cuDNN 4007)


('X_train shape:', (60000, 1, 28, 28))
(60000, 'train samples')
(10000, 'test samples')


In [2]:
#MLP
nb_epoch = 12
batch_size = 128

model = Sequential()

model.add(Convolution2D(nb_filters, nb_conv, nb_conv,
                        border_mode='valid',
                        input_shape=(1, img_rows, img_cols)))
model.add(Activation('relu'))
model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
('Test score:', 0.029353955537044386)
('Test accuracy:', 0.98999999999999999)


In [3]:
import DIVA
from keras.optimizers import SGD

"""
Py-DIVA can set up DIVA at the end of an existing model. 
I don't want to diverge too much from base Keras, 
so this is intentionally the only way to use anything 
beyond a standard DIVA, including deep DIVA or anything 
with convolution, max pooling, or dropout. 
"""

#Set up convolutional layer
premodel = Sequential()

premodel.add(Convolution2D(nb_filters, nb_conv, nb_conv,
                        border_mode='valid',
                        input_shape=(1, img_rows, img_cols)))
premodel.add(Activation('relu'))
premodel.add(Convolution2D(nb_filters, nb_conv, nb_conv))
premodel.add(Activation('relu'))
premodel.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
premodel.add(Dropout(0.25))
premodel.add(Flatten())

input_shape=784
num_hidden=512
nb_epoch=50


#compile model
diva_model = DIVA.diva(nb_classes, input_shape, num_hidden, 
                       hidden_act='relu', loss='mean_squared_error', 
                       optimizer=SGD(), prev_model=premodel,
                       compare=DIVA.compareMSE)

print('Training Model')

#train model
train_metrics=diva_model.train(X_train, y_train, nb_epoch, 1, X_test, y_test, 
                               alt_X_train=alt_X_train, alt_X_test=alt_X_test)            
            
#test model
accuracy=diva_model.test(X_test, y_test, 1, alt_X_test=alt_X_test) 

print('Done')


Training Model


  if(alt_X_train!=None):
  if(alt_X_train==None):


Epoch 1
Loss 404192037.927734


  if(alt_X_test==None):
  if(alt_X_test==None):


Test Accuracy: 0.701700
Epoch 2
Loss 353784281.215454
Test Accuracy: 0.722000
Epoch 3
Loss 315463434.567505
Test Accuracy: 0.739600
Epoch 4
Loss 287233212.600525
Test Accuracy: 0.755300
Epoch 5
Loss 266432154.286255
Test Accuracy: 0.772500
Epoch 6
Loss 251101931.762024
Test Accuracy: 0.784300
Epoch 7
Loss 239801295.748047
Test Accuracy: 0.792700
Epoch 8
Loss 231469294.718750
Test Accuracy: 0.799800
Epoch 9
Loss 225324791.851929
Test Accuracy: 0.805000
Epoch 10
Loss 220792517.384766
Test Accuracy: 0.808500
Epoch 11
Loss 217448746.554382
Test Accuracy: 0.812800
Epoch 12
Loss 214981286.732910
Test Accuracy: 0.814300
Epoch 13
Loss 213160097.942871
Test Accuracy: 0.816400
Epoch 14
Loss 211815614.165161
Test Accuracy: 0.816400
Epoch 15
Loss 210822846.067383
Test Accuracy: 0.817700
Epoch 16
Loss 210089594.004272
Test Accuracy: 0.818600
Epoch 17
Loss 209547911.228821
Test Accuracy: 0.819400
Epoch 18
Loss 209147664.533508
Test Accuracy: 0.819000
Epoch 19
Loss 208851844.926880
Test Accuracy: 0.8