In [2]:
#We use the keras library for training the model in this tutorial
#Keras is a high-level library in Python that is a wrapper over TensorFlow, CNTK and Theano
#convolutional layers: these run input through certain filters, which identify features in the image
#pooling layers: these combine convolutional features, helping in feature reduction
#flatten layers: these convert an N-dimentional layer to a 1D layer
#classification layer: the final layer, which tells us the final result.

# we need labelled data to train any model

# train the data on the training set, validate the results based on the test data

In [1]:
#let’s import the MNIST dataset from Keras. The .load_data() method returns both the training and testing datasets:
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Using TensorFlow backend.


# Let’s try to visualize the digits in the dataset.

In [2]:
import matplotlib.pyplot as plt
image_index = 35
print(y_train[image_index])
plt.imshow(x_train[image_index], cmap='Greys')
plt.show()

5


<Figure size 640x480 with 1 Axes>

# Let’s verify the sizes of the training and testing datasets:

In [3]:
print(x_train.shape)
print(x_test.shape)

(60000, 28, 28)
(10000, 28, 28)


# print all labels until the digit that we visualized above:

In [4]:
print(y_train[:image_index+1])

[5 0 4 1 9 2 1 3 1 4 3 5 3 6 1 7 2 8 6 9 4 0 9 1 1 2 4 3 2 7 3 8 6 9 0 5]


# Cleaning Data before creating the model.

# To work with the Keras API, we need to reshape each image to the format of (M x N x 1).

# We’ll use the .reshape() method to perform this action.

# Finally, normalize the image data by dividing each pixel value by 255 (since RGB value can range from 0 to 255):

In [5]:
# save input image dimensions
img_rows, img_cols = 28,28

x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

x_train = x_train / 255
x_test = x_test / 255

# convert the dependent variable in the form of integers to a binary class matrix

In [6]:
from keras.utils import to_categorical
num_class = 10

y_train = to_categorical(y_train, num_class)
y_test = to_categorical(y_test, num_class)

# We’re now ready to create the model and train it!

# The model design process is the most complex factor, having a direct impact on the performance of the model

# To create the model, we first initialize a sequential model. It creates an empty model object. The first step is to add a convolutional layer which takes the input image:

In [7]:
#Model created
from keras.models import Sequential
#Layers added
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3),
     activation = 'relu', #A “relu” activation stands for “Rectified Linear Units”, which takes the max of a value or zero
     input_shape=(img_rows, img_cols, 1)))

# Next, we add another convolutional layer, followed by a pooling layer:

In [8]:
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))




# Next, we add a “dropout” layer. While neural networks are trained on huge datasets, a problem of overfitting may occur. To avoid this issue, we randomly drop units and their connections during the training process. In this case, we’ll drop 25% of the units:

In [9]:
model.add(Dropout(0.25))

# Next, we add a flattening layer to convert the previous hidden layer into a 1D array:



In [10]:
model.add(Flatten())

# Once we’ve flattened the data into a 1D array, we can add a dense hidden layer, which is normal to a traditional neural network. Next, add another dropout layer before adding a final dense layer which classifies the data

In [11]:
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_class, activation='softmax'))
#“softmax” activation is used 
#when we’d like to classify the data into a number of pre-decided classes

# Compile and Train Model

In [12]:
# We need to compile the model 
#and specify a loss function, an optimizer function 
#and a metric to assess model performance.
#We need to use a sparse_categorical_crossentropy loss function in case we have an integer-dependent variable
#In this example, we’ll use the adam optimizer.

In [13]:
model.compile(loss = 'categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [43]:
#We’re now ready to train the model using the .fit() method
#We need to specify an epoch and batch size when training the model. 

In [14]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)              

In [15]:
batch_size = 128
epochs = 10

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save("test_model.h5")


Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.029200580918066953
Test accuracy: 0.9914000034332275


# When we run the code above, the following output is shown as the model runs. It takes about ten minutes