In [1]:
import tensorflow as tf
tf.__version__

'2.15.0'

In [3]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0 , x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
])

In [7]:
predictions = model.predict(x_train[:1])#Model returns logits, or log-odds score--one for each class.
predictions



array([[ 0.48679668, -0.30727652, -0.58182204, -0.03105865,  0.32014263,
        -0.59404194,  0.06471579,  0.08737686,  0.7465235 ,  0.8450874 ]],
      dtype=float32)

In [9]:
tf.nn.softmax(predictions).numpy()#Converts logits to to probabilities for each class.

array([[0.13104564, 0.05923262, 0.0450119 , 0.07807659, 0.11092912,
        0.0444652 , 0.08592413, 0.08789349, 0.1699106 , 0.18751077]],
      dtype=float32)

In [10]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#Loss function takes in vectors of ground truth values and those of the logits, and returns a scalar loss for each.

In [13]:
loss_fn(y_train[:1], predictions).numpy()

3.1130483

In [14]:
#Configure and compile the model.
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy']
              )

In [15]:
#Train the model.
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7f2dec307190>

In [16]:
#Evaluate.
model.evaluate(x_test, y_test, verbose=2)

313/313 - 1s - loss: 0.0711 - accuracy: 0.9778 - 917ms/epoch - 3ms/step


[0.07112301886081696, 0.9778000116348267]

In [17]:
#Returning probabilities, wrap the model and attach softmax to it.
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

In [18]:
probability_model(x_test[:5]).numpy()

array([[1.0748377e-07, 2.2539622e-08, 8.8603701e-07, 1.1856243e-05,
        6.2255492e-12, 1.5508010e-07, 4.8842506e-13, 9.9998391e-01,
        1.3508306e-07, 3.0771573e-06],
       [5.8176607e-07, 9.7282376e-04, 9.9902332e-01, 1.6289961e-06,
        8.9713939e-16, 1.2388365e-06, 4.6509360e-08, 3.3698698e-14,
        3.1663967e-07, 1.3456521e-14],
       [1.0389068e-06, 9.9861073e-01, 5.9637695e-04, 3.7265866e-06,
        2.3544892e-05, 7.5821144e-06, 1.1175438e-05, 6.4227672e-04,
        1.0071015e-04, 2.7152455e-06],
       [9.9988854e-01, 1.7294900e-10, 7.8240128e-06, 1.3270745e-08,
        1.9769658e-08, 6.6607242e-07, 9.9922210e-05, 2.9976018e-06,
        1.1260753e-09, 1.8579883e-08],
       [1.1633563e-05, 1.4289322e-08, 1.3646461e-05, 3.3038975e-07,
        9.9405330e-01, 6.1197818e-07, 8.8902045e-05, 1.4761134e-04,
        5.2500973e-06, 5.6787506e-03]], dtype=float32)

In [19]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[1.0748377e-07, 2.2539622e-08, 8.8603701e-07, 1.1856243e-05,
        6.2255492e-12, 1.5508010e-07, 4.8842506e-13, 9.9998391e-01,
        1.3508306e-07, 3.0771573e-06],
       [5.8176607e-07, 9.7282376e-04, 9.9902332e-01, 1.6289961e-06,
        8.9713939e-16, 1.2388365e-06, 4.6509360e-08, 3.3698698e-14,
        3.1663967e-07, 1.3456521e-14],
       [1.0389068e-06, 9.9861073e-01, 5.9637695e-04, 3.7265866e-06,
        2.3544892e-05, 7.5821144e-06, 1.1175438e-05, 6.4227672e-04,
        1.0071015e-04, 2.7152455e-06],
       [9.9988854e-01, 1.7294900e-10, 7.8240128e-06, 1.3270745e-08,
        1.9769658e-08, 6.6607242e-07, 9.9922210e-05, 2.9976018e-06,
        1.1260753e-09, 1.8579883e-08],
       [1.1633563e-05, 1.4289322e-08, 1.3646461e-05, 3.3038975e-07,
        9.9405330e-01, 6.1197818e-07, 8.8902045e-05, 1.4761134e-04,
        5.2500973e-06, 5.6787506e-03]], dtype=float32)>