In [17]:
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
adapted from https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py
'''
import tensorflow as tf
mnist = tf.keras.datasets.mnist
Model = tf.keras.models.Model
Input = tf.keras.layers.Input
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
Flatten = tf.keras.layers.Flatten
Conv2D = tf.keras.layers.Conv2D 
MaxPooling2D = tf.keras.layers.MaxPooling2D
K = tf.keras.backend

In [18]:
batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

In [19]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [20]:
#add channel dimension
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    inp_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
inp_shape = (img_rows, img_cols, 1)

In [21]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train /= 255
x_test /= 255

y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [42]:
def cnn_model1():
    """
    keras model builder (using model api) for cnn for mnist classification.
    same one used in official tf example.
    roughly 2 million trainable parameters.
    99.25% accuracy on test data after 12 epochs.
    """
    a0 = Input(shape=inp_shape)
    a1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(a0)
    a2 = Conv2D(64, (3, 3), activation='relu')(a1)
    a2 = MaxPooling2D(pool_size=(2, 2))(a2)
    a2 = Dropout(0.25)(a2)
    a2 = Flatten()(a2)
    a3 = Dense(128, activation='relu')(a2)
    a3 = Dropout(0.5)(a3)
    prediction = Dense(num_classes, activation='softmax')(a3)
    return Model(inputs = a0, outputs = prediction)

def cnn_model2():
    """
    same as cnn_model1 but without dense layer, subsequent dropout, and with weights of output (softmax) layer frozen.
    roughly 20k trainable parameters, and 100k not trained.
    91% accuracy on test data after 12 epochs.
    """
    a0 = Input(shape=inp_shape)
    a1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(a0)
    a2 = Conv2D(64, (3, 3), activation='relu')(a1)
    a2 = MaxPooling2D(pool_size=(2, 2))(a2)
    a2 = Dropout(0.25)(a2)
    a2 = Flatten()(a2)
    prediction = Dense(num_classes, activation='softmax', trainable = False)(a2)
    return Model(inputs = a0, outputs = prediction)

def cnn_model3():
    """
    same as cnn_model2 but with number of convolutional filters reduced to make flattened layer smaller, 
    and crucially, weights of output aren't frozen.
    roughly 25k trainable parameters.
    98.5% accuracy on test data after 12 epochs.
    """
    a0 = Input(shape=inp_shape)
    a1 = Conv2D(8, kernel_size=(3, 3), activation='relu')(a0)
    a2 = Conv2D(16, (3, 3), activation='relu')(a1)
    a2 = MaxPooling2D(pool_size=(2, 2))(a2)
    a2 = Dropout(0.25)(a2)
    a2 = Flatten()(a2)
    prediction = Dense(num_classes, activation='softmax')(a2)
    return Model(inputs = a0, outputs = prediction)

def cnn_model4():
    """
    same as cnn_model3 but with number of convolutional filters reduced to make flattened layer smaller. 
    roughly 12k trainable parameters.
    97.8% accuracy on test data after 12 epochs.
    """
    a0 = Input(shape=inp_shape)
    a1 = Conv2D(4, kernel_size=(3, 3), activation='relu')(a0)
    a2 = Conv2D(8, (3, 3), activation='relu')(a1)
    a2 = MaxPooling2D(pool_size=(2, 2))(a2)
    a2 = Dropout(0.25)(a2)
    a2 = Flatten()(a2)
    prediction = Dense(num_classes, activation='softmax')(a2)
    return Model(inputs = a0, outputs = prediction)

def cnn_model5():
    """
    same as cnn_model4 but with max pooling with pool size = 4 x 4. 
    roughly 3k trainable parameters.
    97.4% accuracy on test data after 12 epochs.
    """
    a0 = Input(shape=inp_shape)
    a1 = Conv2D(4, kernel_size=(3, 3), activation='relu')(a0)
    a2 = Conv2D(8, (3, 3), activation='relu')(a1)
    a2 = MaxPooling2D(pool_size=(4, 4))(a2)
    a2 = Dropout(0.25)(a2)
    a2 = Flatten()(a2)
    prediction = Dense(num_classes, activation='softmax')(a2)
    return Model(inputs = a0, outputs = prediction)

In [33]:
model1 = cnn_model1()
model1.summary()
model1.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 128)               1179776   
__________

In [None]:
model2 = cnn_model2()
model2.summary()
model2.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model3 = cnn_model3()
model3.summary()
model3.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [35]:
model4 = cnn_model4()
model4.summary()
model4.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 26, 26, 4)         40        
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 24, 24, 8)         296       
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 12, 12, 8)         0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 12, 12, 8)         0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 1152)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 10)                11530     
Total para

In [41]:
model5 = cnn_model5()
model5.summary()
model5.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 26, 26, 4)         40        
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 24, 24, 8)         296       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 6, 6, 8)           0         
_________________________________________________________________
dropout_15 (Dropout)         (None, 6, 6, 8)           0         
_________________________________________________________________
flatten_12 (Flatten)         (None, 288)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                2890      
Total para

In [None]:
history1 = model1.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

In [None]:
history2 = model2.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

In [31]:
history3 = model3.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12



In [36]:
history4 = model4.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12



In [43]:
history5 = model5.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12



In [None]:
score1 = model1.evaluate(x_test, y_test, verbose=0)
print('Test loss for model1:', score1[0])
print('Test accuracy for model1:', score1[1])

In [None]:
score2 = model2.evaluate(x_test, y_test, verbose=0)
print('Test loss for model2:', score2[0])
print('Test accuracy for model2:', score2[1])

In [32]:
score3 = model3.evaluate(x_test, y_test, verbose=0)
print('Test loss for model3:', score3[0])
print('Test accuracy for model3:', score3[1])

('Test loss:', 0.04675232584038749)
('Test accuracy:', 0.9856)


In [37]:
score4 = model4.evaluate(x_test, y_test, verbose=0)
print('Test loss for model4:', score4[0])
print('Test accuracy for model4:', score4[1])

('Test loss for model4:', 0.07353736386187375)
('Test accuracy for model4:', 0.9781)


In [44]:
score5 = model5.evaluate(x_test, y_test, verbose=0)
print('Test loss for model5:', score5[0])
print('Test accuracy for model5:', score5[1])

('Test loss for model5:', 0.08627387657612562)
('Test accuracy for model5:', 0.9739)
