In [1]:
# Baseline MLP for MNIST dataset
import numpy as np

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils

# Random seed
seed = 7
np.random.seed(seed)

# Load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 28*28 pixels to 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')

# Normailize input from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# One hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

# Define baseline model
def baseline_model():
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, init='normal', activation='relu'))
    model.add(Dense(num_classes, init='normal', activation='softmax'))
    # Compile
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200,
         verbose=2)
# Final evaluation of the model
# (X_test, y_test) 即用作 validation dataset 又用作 testing dataset 不好
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: {0:.2f}%".format(100 - scores[1]*100))

Using Theano backend.


Train on 60000 samples, validate on 10000 samples
Epoch 1/10
5s - loss: 0.2794 - acc: 0.9206 - val_loss: 0.1396 - val_acc: 0.9585
Epoch 2/10
4s - loss: 0.1102 - acc: 0.9679 - val_loss: 0.0896 - val_acc: 0.9727
Epoch 3/10
4s - loss: 0.0702 - acc: 0.9802 - val_loss: 0.0794 - val_acc: 0.9763
Epoch 4/10
4s - loss: 0.0491 - acc: 0.9859 - val_loss: 0.0733 - val_acc: 0.9786
Epoch 5/10
4s - loss: 0.0358 - acc: 0.9901 - val_loss: 0.0670 - val_acc: 0.9797
Epoch 6/10
4s - loss: 0.0261 - acc: 0.9929 - val_loss: 0.0645 - val_acc: 0.9812
Epoch 7/10
4s - loss: 0.0196 - acc: 0.9956 - val_loss: 0.0585 - val_acc: 0.9821
Epoch 8/10
4s - loss: 0.0127 - acc: 0.9975 - val_loss: 0.0608 - val_acc: 0.9817
Epoch 9/10
4s - loss: 0.0100 - acc: 0.9979 - val_loss: 0.0585 - val_acc: 0.9815
Epoch 10/10
4s - loss: 0.0077 - acc: 0.9987 - val_loss: 0.0593 - val_acc: 0.9817
Baseline Error: 1.83%


In [9]:
# a simple CNN
import numpy as np

from keras.datasets import mnist

from keras.models import Sequential

from keras.layers import Dense
from keras.layers import Dropout

from keras.utils import np_utils

from keras.layers import Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D

# Random seed
seed = 7
np.random.seed(seed)

# Load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# reshape to be [samples][channels][width][height]
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

# Normailize input from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# One hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

def simple_cnn_model():
    model = Sequential()
    # 1st-hidden layers: convolutional layer: Convolution2D
    model.add(Convolution2D(32, 5, 5, border_mode='valid', input_shape=(1, 28, 28),
                           activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    # Compile
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

small_cnn_model = simple_cnn_model()
# Fit
small_cnn_model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10,
                    batch_size=200, verbose=2)
# Final evaluation
scores = small_cnn_model.evaluate(X_test, y_test, verbose=0)

print("Small CNN error: {0:.2f}%".format(100 - scores[1]*100))



Train on 60000 samples, validate on 10000 samples
Epoch 1/10
40s - loss: 0.2351 - acc: 0.9336 - val_loss: 0.0796 - val_acc: 0.9761
Epoch 2/10
40s - loss: 0.0705 - acc: 0.9790 - val_loss: 0.0438 - val_acc: 0.9856
Epoch 3/10
42s - loss: 0.0494 - acc: 0.9848 - val_loss: 0.0389 - val_acc: 0.9877
Epoch 4/10
43s - loss: 0.0403 - acc: 0.9872 - val_loss: 0.0392 - val_acc: 0.9871
Epoch 5/10
41s - loss: 0.0328 - acc: 0.9899 - val_loss: 0.0346 - val_acc: 0.9880
Epoch 6/10
41s - loss: 0.0265 - acc: 0.9915 - val_loss: 0.0327 - val_acc: 0.9899
Epoch 7/10
41s - loss: 0.0221 - acc: 0.9928 - val_loss: 0.0341 - val_acc: 0.9876
Epoch 8/10
41s - loss: 0.0186 - acc: 0.9940 - val_loss: 0.0286 - val_acc: 0.9899
Epoch 9/10
42s - loss: 0.0162 - acc: 0.9950 - val_loss: 0.0338 - val_acc: 0.9889
Epoch 10/10
41s - loss: 0.0144 - acc: 0.9954 - val_loss: 0.0306 - val_acc: 0.9901
Small CNN error: 0.99%


In [10]:
# Large CNN for MNIST Dataset
import numpy as np

from keras.datasets import mnist

from keras.models import Sequential

from keras.layers import Dense
from keras.layers import Dropout

from keras.utils import np_utils

from keras.layers import Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D

from keras import backend as K

K.set_image_dim_ordering('th')

# Random seed
seed = 7
np.random.seed(seed)

# Load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape to be [samples][pixels][width][height]
# It's important
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

# Normailize input from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# One hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

# Define large model
def large_cnn_model():
    model = Sequential()
    model.add(Convolution2D(30, 5, 5, input_shape=(1, 28, 28), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Convolution2D(15, 3, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    # Compile
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

large_cnn_model = large_cnn_model()
large_cnn_model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, 
                    batch_size=200, verbose=2)

# Final evaluate
scores = large_cnn_model.evaluate(X_test, y_test, verbose=0)

print("Larger CNN error: {0:.2f}%".format(100 - scores[1]*100))



Train on 60000 samples, validate on 10000 samples
Epoch 1/10
39s - loss: 0.3783 - acc: 0.8799 - val_loss: 0.0797 - val_acc: 0.9754
Epoch 2/10
40s - loss: 0.0925 - acc: 0.9712 - val_loss: 0.0520 - val_acc: 0.9828
Epoch 3/10
40s - loss: 0.0687 - acc: 0.9783 - val_loss: 0.0373 - val_acc: 0.9883
Epoch 4/10
41s - loss: 0.0562 - acc: 0.9825 - val_loss: 0.0330 - val_acc: 0.9885
Epoch 5/10
43s - loss: 0.0473 - acc: 0.9853 - val_loss: 0.0328 - val_acc: 0.9896
Epoch 6/10
40s - loss: 0.0417 - acc: 0.9870 - val_loss: 0.0285 - val_acc: 0.9901
Epoch 7/10
41s - loss: 0.0383 - acc: 0.9877 - val_loss: 0.0295 - val_acc: 0.9899
Epoch 8/10
42s - loss: 0.0341 - acc: 0.9887 - val_loss: 0.0256 - val_acc: 0.9918
Epoch 9/10
42s - loss: 0.0306 - acc: 0.9902 - val_loss: 0.0222 - val_acc: 0.9918
Epoch 10/10
41s - loss: 0.0281 - acc: 0.9911 - val_loss: 0.0255 - val_acc: 0.9920
Larger CNN error: 0.80%
