# Image classification using the MNIST dataset

## Setup

In [None]:
# Common imports
import sys
import os
import sklearn
import numpy as np
import tensorflow as tf
from tensorflow import keras

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib.pyplot as plt

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

## Load the data

In [None]:
mnist = keras.datasets.mnist
(x_train_full, y_train_full), (x_test, y_test) = mnist.load_data()

In [None]:
# Show the size and dimension of the dataset.
x_train_full.shape

In [None]:
# Split the full training set into a validation set and a (smaller) training set,and scale the pixel intensities down to the 0-1 range and convert them to floats, by dividing by 255.0
x_valid, x_train = x_train_full[:5000] / 255.0, x_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
x_test = x_test / 255.0

x_train: 55000
x_valid: 5000

y_train: 55000
y_valid: 5000

In [None]:
# Plot an image using Matplotlib's imshow() function, with a binary color map:
plt.imshow(x_train[0], cmap="binary")
plt.axis('off')
plt.show()

## Create a model using the Sequential API

In [None]:
model = keras.models.Sequential()
# Input layer:
# A "Flatten" layer converts each input image into a 1-dimensional array.
# You could also use an "InputLayer" instead of a "Flatten" layer.
model.add(keras.layers.Flatten(input_shape=[28, 28]))

# Hidden layers:
# A dense layer is fully connected.
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))

# Output layer.
# The layer contains one neuron per class (i.e. 10).
# Since it is multiclass classification, we should use the softmax activation function.
# It will ensure that the estimated probabilities are between 0 and 1, and that the sum of estimated probabilities for one prediction is 1. (for binary classification we would have a single output neuron using the gogistic activation function).
model.add(keras.layers.Dense(10, activation="softmax"))

model.summary()

## Compile the model

In [None]:
# "sparse_categorical_crossentropy" is the loss function to use for classification when the classes are exclusive.
# "sgd" means Stochastic Gradient Descent.
# "accuracy" enables us to measure the accuracy during training and evaluation.
model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])

## Train the model

In [None]:
# Keras will measure the loss and the extra metrics on the validation set at the end of each epoch.
# The default number of epochs is 1, which is not enough for a good result.
# The default batch-size is 32 instances. Since one batch is presented for each training pass (or step), and the training set contains 55.000 instances, we get 1719 passes per epoch.
# If you want to train more later, you can just call the fit() method again, since Keras just continues training where it left off.

history = model.fit(x_train, y_train, epochs=75, batch_size=32,
                    validation_data=(x_valid, y_valid))

In [None]:
# Show the learning curves. (The training curves should be shifted half an epoch to the left to be completely comparable with the validation curves).
import pandas as pd

pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

## Evaluate the model

In [None]:
model.evaluate(x_test, y_test)

In [None]:
x_new = x_test[:3]
y_proba = model.predict(x_new)
y_proba.round(2)

Sequential.predict_classes (from tensorflow.python.keras.engine.sequential) is deprecated and will be removed after 2021-01-01.<br>
Please use instead:<br>

`np.argmax(model.predict(x), axis=-1)`, if your model does multi-class classification (e.g. if it uses a `softmax` last-layer activation).<br>

`(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).

In [None]:
# Make predictions without probabilities.
# y_pred = model.predict_classes(X_new)
y_pred = np.argmax(model.predict(x_new), axis=-1)
y_pred

In [None]:
plt.imshow(x_test[0], cmap="binary")
plt.axis('off')
plt.show()