In [2]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
#imports the tensorflow package and prints its version

TensorFlow version: 2.13.0


In [3]:
#load the mnist handwritten digit dataset
mnist = tf.keras.datasets.mnist

#x_train and y_train sets are used for training and fitting the model
#x_test and y_test sets are used for testing the model
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#the data set pixel values range from integers 0-255, dividing by 255 scales this down to a float between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
#the Sequential keras model is useful for stacking layers where each layer has 1 input tensor and 1 output tensor
#Layers are functions with a known mathematical structure that can be reused and have trainable variables
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  #flatten layer flattens input into a 1-dimensional array ready for the next layer
  #input shape = dimension of the input data (28, 28)
  tf.keras.layers.Dense(128, activation='relu'),
  #the dense layer that contains neurons that are all connected to every single neuron in the previous layer
  #uses the ReLU activation function
  tf.keras.layers.Dropout(0.2),
  #the dropout layer reduces the overfitting off the network
  tf.keras.layers.Dense(10)
  #another dense layer
])

In [5]:
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.10185015, -0.04807346,  0.1216628 , -0.1105941 , -0.05741018,
         0.73355234,  0.0734771 , -0.3177005 , -0.25964767, -0.10632889]],
      dtype=float32)

In [6]:
tf.nn.softmax(predictions).numpy()
#softmax functions turns the logits into probabilities

array([[0.1045912 , 0.09002935, 0.10668409, 0.084573  , 0.08919269,
        0.1967161 , 0.10166533, 0.0687522 , 0.07286159, 0.0849345 ]],
      dtype=float32)

In [7]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#loss function
#the loss function compares the predicted values and actual values, therefore it measures the accuracy of the model
#cross entropy is used to measure how well the model performs

In [None]:
loss_fn(y_train[:1], predictions).numpy()

In [8]:
model.compile(optimizer='adam',
              #the optimizer is the algorithm that ajusts the weights of the network to minimise the loss
              #the adam model is a stochastic gradient descent method
              #stochastic = having a random probability distribution or pattern that may be analysed but not predicted precisely
              #gradient descent is a way to find optimal values for the paramiters of the network
              loss=loss_fn,
              metrics=['accuracy'])
#compiles the model
#compiling the model means defining the optimizer, loss function, and other metrics

In [9]:
model.fit(x_train, y_train, epochs=5)
#model.fit adjusts parameters and reduces loss (training the model)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x78799ab22bc0>

In [10]:
model.evaluate(x_test,  y_test, verbose=2)
#checks the models performance
#"Returns the loss value & metrics values for the model in test mode."

313/313 - 1s - loss: 0.0733 - accuracy: 0.9770 - 659ms/epoch - 2ms/step


[0.07327789068222046, 0.9769999980926514]