In [6]:
import tensorflow as tf
import tensorflow.keras.layers as layers

In [7]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train /255.0, x_test/255.0

In [8]:
model = tf.keras.models.Sequential([])

In [11]:
model.add(layers.Flatten(input_shape=(28, 28)))
model.add(layers.Dense(128, activation ='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(10))

In [13]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
________________________________________________

In [14]:
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.52414775,  0.25860754, -0.02770699, -0.1558033 ,  0.23130539,
         0.5631057 , -0.33463833,  0.18776605,  0.05571263, -0.07580306]],
      dtype=float32)

In [16]:
tf.nn.softmax(predictions).numpy()

array([[0.14392574, 0.11036105, 0.08288407, 0.0729188 , 0.10738872,
        0.14964345, 0.06097789, 0.10281342, 0.0900948 , 0.07899201]],
      dtype=float32)

The losses.SparseCategoricalCrossentropy loss takes a vector of logits and a True index and returns a scalar loss for each example.

In [17]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

This loss is equal to the negative log probability of the true class: It is zero if the model is sure of the correct class.

This untrained model gives probabilities close to random (1/10 for each class), so the initial loss should be close to -tf.log(1/10) ~= 2.3.

In [18]:
loss_fn(y_train[:1], predictions).numpy()

1.8994998

In [19]:
model.compile(optimizer = 'adam',
             loss=loss_fn,
             metrics = ['accuracy'])

In [20]:
model.fit(x_train,y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x12aef0fb4e0>

In [21]:
model.evaluate(x_test, y_test, verbose=2)

313/313 - 0s - loss: 0.0753 - accuracy: 0.9775


[0.07526135444641113, 0.9775000214576721]

In [23]:
probability_model = tf.keras.Sequential([model,
                                       layers.Softmax()
                                       ])

In [24]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[1.33377671e-06, 8.92054763e-10, 1.87549831e-05, 2.48215074e-04,
        1.13018527e-11, 6.98339235e-08, 1.48618287e-13, 9.99721825e-01,
        1.54745146e-06, 8.26791256e-06],
       [6.61926403e-10, 1.15978495e-04, 9.99863982e-01, 1.85022182e-05,
        1.41477478e-16, 5.32331853e-07, 2.81893424e-08, 5.90845678e-13,
        1.12824341e-06, 1.91172742e-16],
       [7.12258128e-08, 9.98264253e-01, 5.29253120e-05, 2.63577422e-05,
        5.67089774e-05, 2.65816379e-06, 1.98224225e-05, 1.20747928e-03,
        3.66118882e-04, 3.49975971e-06],
       [9.99915242e-01, 2.31392616e-09, 5.70831435e-05, 8.28797795e-08,
        4.71977444e-07, 3.08786366e-06, 1.20382092e-05, 1.99429655e-06,
        1.01448478e-07, 9.79300694e-06],
       [1.04361307e-05, 6.99653724e-10, 1.07371227e-06, 9.89850246e-09,
        9.96092975e-01, 3.25068683e-08, 9.99864483e-07, 4.52714419e-04,
        5.53289908e-07, 3.44123645e-03]], dtype=float32)>