## Classification of the MNIST (handwritten digits) dataset using Covolutional Neural Network
### Test accuracy > 99%

In [None]:
from __future__ import print_function
import numpy as np
from keras.datasets import mnist 
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils
np.random.seed(101)

In [58]:
## Defning network and training
NB_EPOCH = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10
OPTIMIZER = Adam()
N_HIDDEN = 250
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3

In [4]:
## Data Splitting
(X_train,Y_train),(X_test,Y_test) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz

In [13]:
# X_train is a column vector where each row is a 28x28 array (60k rows)
# Let's reshape this so that each entry in the array is a column
# 60k x 784
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [14]:
# Since these are pixel values, we should normalize by color channel (255)
X_train /= 255
X_test /= 255

In [15]:
# Since the outputs are classes, we should convert to catagorical
Y_train = np_utils.to_categorical(Y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(Y_test, NB_CLASSES)

In [59]:
## Network Construction
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED, ))) # Adding a dense layer, with 10 neurons (1 for each class)
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax')) # Adding a softmax activation function for multi-class output
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 250)               196250    
_________________________________________________________________
activation_13 (Activation)   (None, 250)               0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 250)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 10)                2510      
_________________________________________________________________
activation_14 (Activation)   (None, 10)                0         
Total params: 198,760
Trainable params: 198,760
Non-trainable params: 0
_________________________________________________________________


In [61]:
# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics = ['accuracy'])

In [64]:
# Training the model
history = model.fit(X_train, Y_train, batch_size=128, 
                    epochs=30, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

Train on 48000 samples, validate on 12000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [65]:
## Scoring
score = model.evaluate(X_test, Y_test, verbose=0)
print("Test score:", score[0])
print("Test accuracy:", score[1])

Test score: 0.0734901695074
Test accuracy: 0.984


In [66]:
######## CONVULUTIONAL DEEP NETS

In [68]:
## Defning network and training
NB_EPOCH = 20
BATCH_SIZE = 128
VERBOSE = 1
IMG_ROWS, IMG_COLS = 28,28
NB_CLASSES = 10
INPUT_SHAPE = (1,IMG_ROWS,IMG_COLS)
OPTIMIZER = Adam()
N_HIDDEN = 250
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3

In [74]:
from keras import backend as K
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.datasets import mnist
from keras.utils import np_utils
from keras.optimizers import SGD,RMSprop, Adam
import numpy as np
#import matplotlib.pyplot as plt

In [87]:
# define the convNet
class LeNet:
    @staticmethod
    def build(input_shape, classes):
        model = Sequential()
        # CONV -> RELU -> POOL
        model.add(Conv2D(50, kernel_size=5,padding='same', input_shape=input_shape))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
        model.add(Conv2D(50,kernel_size = 5, border_mode='same'))
        model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
        model.add(Activation('relu'))
        # Flatten => RELU layers
        model.add(Flatten())
        model.add(Dense(500))
        model.add(Activation("relu"))
        # Softmax classifier
        model.add(Dense(classes))
        model.add(Activation("softmax"))
        return model

In [119]:
# Loading and formatting data
(X_train, y_train),(X_test,y_test) = mnist.load_data()

In [120]:
print(X_train.shape)
# X_train is 60,000 arrays of size 28x28
print(y_train.shape)
# y_train is 60,000 integer lables


K.set_image_dim_ordering("th")

(60000, 28, 28)
(60000,)


In [121]:
# Convert to float and normalize
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255


In [122]:
# Need a 60k x [1x28x28] shape as input to the convnet
X_train = X_train[:, np.newaxis, :, :]
X_test = X_test[:,np.newaxis, :, :]
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

print(X_train.shape)

60000 train samples
10000 test samples
(60000, 1, 28, 28)


In [123]:
# Convert class vectors to binary class matrices
y_train = np_utils.to_categorical(y_train, NB_CLASSES)
y_test = np_utils.to_categorical(y_test, NB_CLASSES)

print(y_train[5])

[ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]


In [124]:
# Init the optimizer and model
model = LeNet.build(input_shape = INPUT_SHAPE, classes = NB_CLASSES)
model.compile(loss = 'categorical_crossentropy', optimizer = OPTIMIZER, metrics = ['accuracy'])

  # Remove the CWD from sys.path while we load stuff.


In [125]:
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs = NB_EPOCH, verbose=VERBOSE, 
                   validation_split=VALIDATION_SPLIT)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [126]:
score = model.evaluate(X_test, y_test, verbose=VERBOSE)



In [127]:
print("Test Score: ", score[0])
print("Test Accuracy: ", score[1])

Test Score:  0.0446791687175
Test Accuracy:  0.9907
