In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [23]:
mnist_dataset, mnist_info = tfds.load(name='mnist', as_supervised=True, with_info=True)

[1mDownloading and preparing dataset mnist/3.0.1 (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /root/tensorflow_datasets/mnist/3.0.1...[0m


local data directory. If you'd instead prefer to read directly from our public
GCS bucket (recommended if you're running on GCP), you can instead pass
`try_gcs=True` to `tfds.load` or set `data_dir=gs://tfds-data/datasets`.



HBox(children=(FloatProgress(value=0.0, description='Dl Completed...', max=4.0, style=ProgressStyle(descriptio…



[1mDataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.[0m


In [24]:
#unpack the data
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)
num_test_sample = mnist_info.splits['test'].num_examples
num_test_sample = tf.cast(num_test_sample, tf.int64)

Scale the images

In [26]:
#define a function that will scale the images
#each individual color is 0–255 (as 2⁸ = 256 possibilities)
def scale(image, label):
  image = tf.cast(image, tf.float32)
  image /= 255.
  return image, label 

scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

Shuffle the data

In [27]:
#Buffer size is a scalar that affects the randomness of the transformation
#maintains a buffer of buffer_size elements and randomly selects the next element from that buffer (replacing it with the next input element, if one is available)
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
validation_data = shuffled_train_and_validation_data.skip(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_sample)

Outline the model

In [29]:
output_size = 10
hidden_layer_size = 50

model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape = (28, 28, 1)),
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                             tf.keras.layers.Dense(hidden_layer_size, activation='softmax')])
#define the output size
#define the hidden layer size
#size of the hidden layer should be between the size of the input layer and the size of the output layer. In this case, the hidden layer size will be 50
#define the model
#Sequential model is a linear stack of layers
#choose activation function for the hidden and output layer
#https://towardsdatascience.com/deep-learning-with-tensorflow-5d3a7a8c55cd

Compile the model

In [30]:
#Optimizer: Optimization algorithms or strategies are responsible for reducing the losses and to provide the most accurate results possible. In this example, we are using an optimization algorithm called Adam. Adam is an optimization algorithm that can be used instead of the classical stochastic gradient descent procedure to update network weights iteratively based on the training data.
#Loss: It’s a method of evaluating how well a specific algorithm models the given data. If predictions deviate too much from actual results, loss function would cough up a very large number. Categorical cross-entropy and sparse categorical cross-entropy have the same loss function — the only difference is that we are using the Categorical cross-entropy when the inputs are one-hot encoded and we are using the sparse categorical cross-entropy when the inputs are integers.
#Metrics: A metric is a function that is used to judge the performance of the model.

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Train the model

In [31]:
#Batch size — the number of training examples in one forward/backward pass
#The number of epochs — Number of epochs to train the model. An epoch is an iteration over the entire x and y data provided.
#Early stop — Stop training when a monitored quantity has stopped improving. Patience is a number that defines a number of epochs that produced the monitored quantity with no improvement after which the training will be stopped.

# fixed number of epochs
#define the Batch size, the maximum epochs, and early stop
NUM_EPOCHS = 10
BATCH_SIZE = 100
EARLY_STOP = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(train_data,
          epochs=NUM_EPOCHS,
          callbacks=[EARLY_STOP],
          validation_data=validation_data,
          verbose = 2)

Epoch 1/10
540/540 - 9s - loss: 0.5375 - accuracy: 0.8583 - val_loss: 0.2229 - val_accuracy: 0.9374
Epoch 2/10
540/540 - 6s - loss: 0.1986 - accuracy: 0.9424 - val_loss: 0.1635 - val_accuracy: 0.9518
Epoch 3/10
540/540 - 6s - loss: 0.1522 - accuracy: 0.9554 - val_loss: 0.1247 - val_accuracy: 0.9640
Epoch 4/10
540/540 - 6s - loss: 0.1263 - accuracy: 0.9633 - val_loss: 0.1020 - val_accuracy: 0.9702
Epoch 5/10
540/540 - 6s - loss: 0.1050 - accuracy: 0.9681 - val_loss: 0.0861 - val_accuracy: 0.9758
Epoch 6/10
540/540 - 6s - loss: 0.0945 - accuracy: 0.9713 - val_loss: 0.0808 - val_accuracy: 0.9763
Epoch 7/10
540/540 - 6s - loss: 0.0820 - accuracy: 0.9754 - val_loss: 0.0733 - val_accuracy: 0.9776
Epoch 8/10
540/540 - 6s - loss: 0.0724 - accuracy: 0.9781 - val_loss: 0.0620 - val_accuracy: 0.9821
Epoch 9/10
540/540 - 6s - loss: 0.0656 - accuracy: 0.9800 - val_loss: 0.0503 - val_accuracy: 0.9856
Epoch 10/10
540/540 - 6s - loss: 0.0599 - accuracy: 0.9825 - val_loss: 0.0584 - val_accuracy: 0.9822

<tensorflow.python.keras.callbacks.History at 0x7f0d674bdd50>

Evaluate the model

In [32]:
test_loss, test_accuracy = model.evaluate(test_data)

print('Test loss: {0:.2f}'.format(test_loss))
print('Test accuracy: {0:.2f}'.format(test_accuracy*100.))

Test loss: 0.11
Test accuracy: 96.62


This is a simple example of image classification