In [1]:
# The MNIST dataset comes preloaded in Keras, in the form of a set of four Numpyarrays.
from tensorflow.keras.datasets import mnist


(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [2]:
# Let’s look at the training data:


In [3]:
# The images are encoded as Numpy arrays, and the labels are an array of digits, ranging
# from 0 to 9. The images and labels have a one-to-one correspondence.

print(train_images.ndim)
print(train_labels.ndim)

3
1


In [4]:
print(train_images.dtype)
print(train_labels.dtype)

uint8
uint8


In [5]:
print(train_images.shape)
print(train_labels.shape)

(60000, 28, 28)
(60000,)


In [6]:
total_train=len(train_labels)
print("Total Train records: ",total_train)

total_test=len(test_labels)
print("Total Test records: ",total_test)

total= len(test_labels)+len(train_labels)
print("Total records: ",total)

train_perc= (total_train * 100)/total
print("Train records percentage: ",train_perc)

test_perc= (total_test * 100)/total
print("Test records percentage: ",test_perc)

print(train_perc+test_perc)

Total Train records:  60000
Total Test records:  10000
Total records:  70000
Train records percentage:  85.71428571428571
Test records percentage:  14.285714285714286
100.0


In [7]:
train_labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [8]:
train_images

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [9]:
digit = train_images[4]

import matplotlib.pyplot as plt

plt.imshow(digit, cmap=plt.cm.binary)
plt.show()

<Figure size 640x480 with 1 Axes>

In [10]:
# The network architecture

# Here, our network consists of a sequence of two Dense layers, which are densely connected 
# neural layers. The second (and last) layer is a 10-way softmax layer, which means it will 
# return an array of 10 probability scores (summing to 1). Each score will be the probability, 
# that the current digit image belongs to one of our 10 digit classes.

from tensorflow.keras import models
from tensorflow.keras import layers

network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dense(10, activation='softmax'))

In [11]:
# The compilation step

# To make network ready for training, we need to pick three more things, as part of compilation step:
    
#  A loss function: 
# How the network will be able to measure its performance on the training data, and thus how it 
# will be able to steer itself in the right direction.
    
#  An optimizer:  
# The mechanism through which the network will update itself based on the data  it sees and its 
# loss function.
    
#  Metrics to monitor during training and testing:  
# Here, we’ll only care about accuracy(the fraction of the images that were correctly classified).

network.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Fortunately, when it comes to common problems such as classification, regression,
# and sequence prediction, there are simple guidelines you can follow to choose the
# correct loss. For instance, you’ll use; 
# o  binary_crossentropy for a two-class classification problem, 
# o  categorical_crossentropy for a many-class classification problem, 
# o  meansquared error(mse) for a regression problem, 
# o  connectionist temporal classification(CTC) for a sequence-learning problem

In [12]:
# Preparing the image data

# Before training, we’ll preprocess the data by reshaping it into the shape the network
# expects and scaling it so that all values are in the [0, 1] interval. Previously, our training
# images, for instance, were stored in an array of shape (60000, 28, 28) of type
# uint8 with values in the [0, 255] interval. We transform it into a float32 array of
# shape (60000, 28 * 28) with values between 0 and 1.

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

In [13]:
# Preparing the labels

# We also need to categorically encode the labels,

from tensorflow.keras.utils import to_categorical
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [14]:
# We’re now ready to train the network, we fit the model to its training data
network.fit(train_images, train_labels, epochs=5, batch_size=128)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1e0e0e7a080>

In [15]:
# Two quantities are displayed during training: the loss of the network over the training
# data, and the accuracy of the network over the training data.

In [16]:
# We quickly reach an accuracy of 0.989 (98.9%) on the training data. Now let’s
# check that the model performs well on the test set, too:
test_loss, test_acc = network.evaluate(test_images, test_labels)
print('test_acc:', test_acc)

test_acc: 0.98


In [17]:
# The test-set accuracy turns out to be 98.87%—that’s quite a bit lower than the training
# set accuracy. This gap of 0.87% between training accuracy and test accuracy is an example of
# overfitting