In [1]:
"""Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
"""

from __future__ import print_function

import keras
import numpy as np
from keras import Model
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop

batch_size = 128
epochs = 1

Using TensorFlow backend.


In [2]:
# 10 CLASSES
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# 9 CLASSES
train_9_indices = np.where(y_train != 9)
(x_train_9, y_train_9) = (x_train[train_9_indices], y_train[train_9_indices])

test_9_indices = np.where(y_test != 9)
(x_test_9, y_test_9) = (x_test[test_9_indices], y_test[test_9_indices])

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

# convert class vectors to binary class matrices
y_train_9 = keras.utils.to_categorical(y_train_9, 9)
y_test_9 = keras.utils.to_categorical(y_test_9, 9)

60000 train samples
10000 test samples


In [3]:
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(9, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train_9, y_train_9,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test_9, y_test_9))
score = model.evaluate(x_test_9, y_test_9, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 9)                 4617      
Total params: 669,193
Trainable params: 669,193
Non-trainable params: 0
_________________________________________________________________
Train on 54051 samples, validate on 8991 samples
Epoch 1/1
Test loss: 0.12062001218160584
Test accuracy: 0.9610722

In [4]:
def add_outputs(model, n):
    output_layer = model.layers.pop()
    weights = output_layer.get_weights()

    shape = weights[0].shape[0]
    weights[1] = np.concatenate((weights[1], np.zeros(n)), axis=0)
    weights[0] = np.concatenate((weights[0], -0.0001 * np.random.random_sample((shape, n)) + 0.0001), axis=1)

    out = Dense(weights[1].shape[0], activation='softmax')(output_layer.input)
    new_model = Model(inputs=model.input, outputs=out)
    new_model.layers[-1].set_weights(weights)
    return new_model

new_model = add_outputs(model, 1)

new_model.summary()

new_model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(),
                  metrics=['accuracy'])

new_history = new_model.fit(x_train, y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            verbose=1,
                            validation_data=(x_test, y_test))
new_score = new_model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', new_score[0])
print('Test accuracy:', new_score[1])


Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1_input (InputLayer)   (None, 784)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_____________________________________________________