In [2]:
import keras
from keras.datasets import mnist

In [3]:
# Split the mnists dataset to train/test
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
import matplotlib.pyplot as plt
# Plot the first image in the dataset
plt.imshow(x_train[0])

<matplotlib.image.AxesImage at 0x15cc9d87470>

In [5]:
# Check the dimensions of the first image
# All images in this dataset are 28x28 pixels, this does not happen with real-world datasets. Each image must be reshaped. 
x_train[0].shape

(28, 28)

In [6]:
# Reshape the data to fit the model.
# We want 60,000 images in the train set and 10,000 in the test set. 28x28 are the dimensions of the image,
# 1 represents the images as greyscale. 
x_train = x_train.reshape(60000,28,28,1)
x_test = x_test.reshape(10000,28,28,1)

In [7]:
# one-hot encode the target column
# A column will be created for each output category and binary variable is inputted for each category. 
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [8]:
# Build the model
# The Sewuential model allows us to build the model layer by layer.
# The first 2 layers are convolutional layers which are 2D matrices, that will handle the input images. 
# 32 and 64 are the number of nodes in each conv layer, kernel size is the size of our filter matrix (we will have a 3x3 matrix), 
# ReLU is our activation function Rectified Linear Units has proven to work well on neural nets.
# The first layer also takes an input image of size 28x28 with 1 signifying the images are greyscale.
# The flatten layers serves as a connection between the conv & dense layers.
# There will be 10 nodes in the output (dense) layer, 0-9.
# Softmax outputs the sum up to 1 so the output can be interperetted as probabilities. 

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
model = Sequential()
model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [9]:
# Compile the model using accuracy to measure performance
model.compile(optimizer='adam', loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [10]:
# Training
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=3)

Instructions for updating:
Use tf.cast instead.
Train on 60000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x15cd2214da0>

In [11]:
# Using the model to predict.
# The predict funtion returns an array of 10 numbers that are probabilities of the unput being each digit. 
# The array index with the highest number is the digit the model predicts. 
model.predict(x_test[:4])

array([[1.4922288e-10, 3.9663776e-11, 1.8808815e-07, 7.3652167e-08,
        5.4956816e-14, 2.7808905e-12, 4.4245648e-13, 9.9999976e-01,
        4.0899412e-08, 2.0049298e-09],
       [1.8175333e-05, 4.3814957e-08, 9.9997723e-01, 3.1223424e-09,
        5.5305292e-11, 1.6007129e-12, 3.8078861e-06, 8.2390373e-14,
        6.9855668e-07, 4.4817454e-13],
       [3.6128654e-07, 9.9969673e-01, 9.6329622e-06, 1.5997142e-07,
        1.6595370e-05, 2.5065704e-06, 2.3997625e-06, 9.0139599e-07,
        2.7077430e-04, 1.5358997e-09],
       [9.9999738e-01, 2.9939221e-10, 7.6879581e-07, 1.6948108e-09,
        1.7585746e-08, 2.0209071e-10, 1.4523436e-06, 2.8694802e-09,
        2.0537676e-08, 4.8945520e-07]], dtype=float32)