In [47]:
from __future__ import print_function  
# for Python2 compatibility 

In [48]:
import numpy 
numpy.random.seed(1337)   # for experiment reproducibility 

In [49]:
import keras
from keras.models import Sequential
from keras.layers import Dense 
from keras.optimizers import SGD 
from keras.datasets import mnist

In [50]:
num_classes = 10

batch_size = 128     
epochs = 30

In [51]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [52]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [53]:
x_train = x_train.reshape(60000, 784)  # 28x28 = 784
x_test = x_test.reshape(10000, 784)

x_train = x_train.astype('float32') 
x_test = x_test.astype('float32') 

x_train /= 255  # 0 - 255 ...  0-1 
x_test /= 255   # normalizing your data 


print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

60000 train samples
10000 test samples


In [54]:
print(x_train.shape)
print(y_train.shape)

(60000, 784)
(60000,)


In [55]:
y_train[0]

5

In [56]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [57]:
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

That's called a one-hot vector. 

In [58]:
model = Sequential()

In [59]:
model.add( Dense(512, activation='sigmoid', input_shape=(784,)  ) )
model.add( Dense(512, activation='sigmoid'  ) )
model.add( Dense(512, activation='sigmoid'  ) )
model.add( Dense(num_classes, activation='softmax'))

In [60]:
# What does the model look like? 
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 512)               401920    
_________________________________________________________________
dense_11 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_12 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_13 (Dense)             (None, 10)                5130      
Total params: 932,362
Trainable params: 932,362
Non-trainable params: 0
_________________________________________________________________


In [61]:
# Compile the model 
model.compile(loss='categorical_crossentropy', optimizer=SGD(), 
              metrics=['accuracy'])

In [62]:
# let's perform the learning 
history = model.fit( x_train, y_train, 
           batch_size=batch_size,
           epochs=epochs, 
           verbose=1, 
           validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [64]:
# Let's evaluate the model 
score = model.evaluate(x_test, y_test)



See some great results here: http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#4d4e495354 

In [31]:
score[1]

0.8809

In [46]:
score[1]

0.8961

In [65]:
score[1]

0.8121