# MNIST
## Loading

In [13]:
import tensorflow as tf

In [14]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [15]:
x_train, x_test = x_train / 255.0, x_test / 255.0

## Model
Building the Sequential model by stacking layers

In [16]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
])

## Prediction

In [17]:
predictions = model(x_train[:1]).numpy()
predictions

array([[-0.6533852 ,  0.14482024, -0.14389896, -0.75136817, -0.99446326,
         0.10279155,  0.2075993 , -0.9738234 ,  0.28784522, -0.28650537]],
      dtype=float32)

The tf.nn.softmax function converts these logits to probabilities for each class

Can do be done as the activation function of the final dense layer, but a bad idea as it makes it impossible to provide and exact and numerically stable loss calculation for all models when using a softmax output

In [18]:
tf.nn.softmax(predictions).numpy()

array([[0.06356695, 0.14121719, 0.10580309, 0.05763388, 0.04519631,
        0.135405  , 0.15036687, 0.04613885, 0.16293055, 0.09174137]],
      dtype=float32)

The losses.SpareCategoricalCrossentropy loss takes a vector of logits and a True index and returns a scalar loss for each example

The loss is the negaative log probability of the true class

In [19]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)

In [20]:
loss_fn(y_train[:1], predictions).numpy()

1.999485

The untrained model has a loss of about 2

## Training
The Model.fit method adjusts the model parameters to minimize the loss

In [21]:
model.compile(optimizer = 'adam',
              loss = loss_fn,
              metrics = ['accuracy'])

In [22]:
model.fit(x_train, y_train, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f47d8503400>

Model.evalaute method checks the model's performance on a Validation or a Testing set

In [31]:
model.evaluate(x_test, y_test, verbose = 1)



[0.071898452937603, 0.9779999852180481]

The trained model can now be wrapped around with a softmax layer to return probability

In [32]:
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

In [34]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[2.0542620e-08, 5.4019511e-10, 1.0290047e-05, 2.9362456e-04,
        4.7601001e-10, 1.8215069e-08, 9.1634382e-14, 9.9969447e-01,
        7.7553767e-07, 8.1635085e-07],
       [2.5183641e-10, 1.3750466e-05, 9.9995816e-01, 2.6195927e-05,
        1.4603344e-16, 1.0380346e-07, 3.0328845e-08, 4.5965163e-12,
        1.8301110e-06, 5.5935063e-13],
       [2.7905895e-07, 9.9704212e-01, 9.1661408e-05, 2.1939173e-05,
        8.1209982e-05, 1.3593687e-05, 1.8769220e-05, 1.2987241e-03,
        1.4261382e-03, 5.6514027e-06],
       [9.9994588e-01, 5.1397031e-10, 2.1899818e-05, 2.8642269e-08,
        9.8739918e-07, 3.9010815e-06, 3.9328856e-06, 1.1342747e-05,
        2.2127290e-08, 1.1973714e-05],
       [6.1782788e-07, 5.6527727e-10, 8.7821428e-07, 4.1815071e-08,
        9.9884123e-01, 8.5142958e-08, 4.7145818e-06, 7.3820731e-05,
        1.5198476e-07, 1.0785246e-03]], dtype=float32)>