In [1]:
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
Adapted from https://github.com/keras-team/keras/blob/master/examples/mnist_mlp.py
'''
#installation of keras is screwed on ap laptop, so have to use tf module to use it.
#for some reason can't import tf.keras, or tf.keras.other, so have to bind to names as follows
import tensorflow as tf
mnist = tf.keras.datasets.mnist
Model = tf.keras.models.Model
Input = tf.keras.layers.Input
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout

  from ._conv import register_converters as _register_converters


In [2]:
#number of epochs changed from 20 to 12 to match cnn example
batch_size = 128
num_classes = 10
epochs = 12
inp_shape = 784

In [3]:
# the data, split between train and test sets
#training set is 60k images, test set is 10k
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
#flatten images for fully connected layer
x_train = x_train.reshape(60000, inp_shape)
x_test = x_test.reshape(10000, inp_shape)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

#normalise input value ranges to [0,1]
x_train /= 255
x_test /= 255

# convert class scalars to binary class vectors
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

(10000,)


In [18]:
def mlp_model1():
    """
    keras model builder (using model api) for mlp for mnist classification.
    one used in original tf example.
    roughly 670k parameters.
    """
    a0 = Input(shape = (inp_shape,))
    a1 = Dense(512, activation = 'relu')(a0)
    a1 = Dropout(0.2)(a1)
    a2 = Dense(512, activation='relu')(a1)
    a2 = Dropout(0.2)(a2)
    prediction = Dense(num_classes, activation='softmax')(a2)
    return Model(inputs = a0, outputs = prediction)

def mlp_model2():
    """
    slightly modified version of tf example (dropout changed to 0.25 to match cnn examples).
    roughly 670k parameters
    98.25% accuracy after 12 epochs.
    """
    a0 = Input(shape = (inp_shape,))
    a1 = Dense(512, activation = 'relu')(a0)
    a1 = Dropout(0.25)(a1)
    a2 = Dense(512, activation='relu')(a1)
    a2 = Dropout(0.25)(a2)
    prediction = Dense(num_classes, activation='softmax')(a2)
    return Model(inputs = a0, outputs = prediction)

def mlp_model3():
    """
    modified version of mlp_model2 with much smaller layers.
    roughly 23k parameters.
    94% accuracy after 12 epochs.
    """
    a0 = Input(shape = (inp_shape,))
    a1 = Dense(28, activation = 'relu')(a0)
    a1 = Dropout(0.25)(a1)
    a2 = Dense(28, activation='relu')(a1)
    a2 = Dropout(0.25)(a2)
    prediction = Dense(num_classes, activation='softmax')(a2)
    return Model(inputs = a0, outputs = prediction)

def mlp_model4():
    """
    even smaller version of mlp_model2.
    roughly 13k parameters.
    91.5% accuracy after 12 epochs.
    
    """
    a0 = Input(shape = (inp_shape,))
    a1 = Dense(16, activation = 'relu')(a0)
    a1 = Dropout(0.25)(a1)
    a2 = Dense(16, activation='relu')(a1)
    a2 = Dropout(0.25)(a2)
    prediction = Dense(num_classes, activation='softmax')(a2)
    return Model(inputs = a0, outputs = prediction)

In [None]:
#optimiser changed from rmsprop to adadelta to match cnn examples

In [9]:
model1 = mlp_model1()
model1.summary()
model1.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_3 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_4 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


In [10]:
model2 = mlp_model2()
model2.summary()
model2.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_6 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


In [15]:
model3 = mlp_model3()
model3.summary()
model3.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 28)                21980     
_________________________________________________________________
dropout_7 (Dropout)          (None, 28)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 28)                812       
_________________________________________________________________
dropout_8 (Dropout)          (None, 28)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                290       
Total params: 23,082
Trainable params: 23,082
Non-trainable params: 0
_________________________________________________________________


In [19]:
model4 = mlp_model4()
model4.summary()
model4.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 16)                12560     
_________________________________________________________________
dropout_9 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_14 (Dense)             (None, 16)                272       
_________________________________________________________________
dropout_10 (Dropout)         (None, 16)                0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                170       
Total params: 13,002
Trainable params: 13,002
Non-trainable params: 0
_________________________________________________________________


In [15]:
#for illustrative purposes, get input tensor for dense1 layer
dense1 = model1.get_layer('dense_1')
print dense1.input
#note, cannot get input tensor before running Model() (unless you look at output of tensor to be used as input)

<tf.Tensor 'input_7:0' shape=(?, 784) dtype=float32>

In [None]:
history1 = model1.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

In [11]:
history2 = model2.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12



In [16]:
history3 = model3.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12



In [20]:
history4 = model4.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12



In [None]:
score1 = model1.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score1[0])
print('Test accuracy:', score1[1])

In [13]:
score2 = model2.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score2[0])
print('Test accuracy:', score2[1])

('Test loss:', 0.05933389719272382)
('Test accuracy:', 0.9824)


In [17]:
score3 = model3.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score3[0])
print('Test accuracy:', score3[1])

('Test loss:', 0.2038772569000721)
('Test accuracy:', 0.9404)


In [21]:
score4 = model4.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score4[0])
print('Test accuracy:', score4[1])

('Test loss:', 0.30465324751734735)
('Test accuracy:', 0.9144)
