In [1]:
#!/usr/local/bin/python3.10
import tensorflow as tf
import numpy as np
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.9.1


## Load the dataset

Load the dataset and split between x, y testing and training data.

In [2]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
# make the brightness values for each pixel between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

## Create the neural network

Preprocess using convolutions before inputting into the main part of the neural network.

In [3]:
model = tf.keras.models.Sequential([
  # perform some convolutions, increasing in size to preprocess the data and give the model correlations between neuron relative position on a 28x28 matrix
  tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(28, 28, 1)),
  tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.MaxPooling2D((2,2)),

  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.MaxPooling2D((2,2)),

  # slowly lower the number of neurons in each layer, to increase accuracy.
  tf.keras.layers.Dropout(0.5), 
  tf.keras.layers.Flatten(input_shape=(4, 4, 128)),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dropout(0.4),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dropout(0.1),
  # use softmax output for output layer to return a number between 0 and 1 (sigmoid could also be used).
  tf.keras.layers.Dense(10, activation='softmax')
])

2023-08-10 18:56:42.995577: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Print predictions before training

In [4]:
predictions = model(x_train[:1]).numpy()
predictions

array([[0.10497504, 0.09995271, 0.09904867, 0.09566367, 0.10067783,
        0.10240548, 0.10322195, 0.09732593, 0.10059495, 0.09613378]],
      dtype=float32)

In [5]:
tf.nn.softmax(predictions).numpy()

array([[0.10049833, 0.09999485, 0.0999045 , 0.09956689, 0.10006739,
        0.10024042, 0.1003223 , 0.09973254, 0.1000591 , 0.09961371]],
      dtype=float32)

## Define the loss and optimizer functions

In [6]:
# use sparse categorical cross entropy for classification jobs
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [7]:
loss_fn(y_train[:1], predictions).numpy()

2.3001838

In [8]:
# ues adam optimizer function for dynamically updating gradient descent (increases accuracy).
optimizerFunction = tf.keras.optimizers.Adam(
    learning_rate=0.001,
    beta_1=0.9,
    beta_2=0.995,
    epsilon=5e-06,
    amsgrad=True,
    name='Adam',
)

In [9]:
model.compile(optimizer=optimizerFunction,
              loss=loss_fn,
              metrics=['accuracy'])

## Start Training

Use a callback with early stopping to decrease overfitting of the model. Train to a max of 50 epochs.

In [12]:
callback = tf.keras.callbacks.EarlyStopping(monitor="accuracy", patience=2)
model.fit(x_train, y_train, epochs=50, batch_size=32, callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50


<keras.callbacks.History at 0x1598f7e20>

## Evaluate

In [13]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 3s - loss: 0.0157 - accuracy: 0.9957 - 3s/epoch - 9ms/step


[0.015689877793192863, 0.9957000017166138]

In [15]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [16]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[0.08533674, 0.08533674, 0.08533674, 0.08533674, 0.08533674,
        0.08533674, 0.08533674, 0.23196931, 0.08533674, 0.08533674],
       [0.08533674, 0.08533674, 0.23196931, 0.08533674, 0.08533674,
        0.08533674, 0.08533674, 0.08533674, 0.08533674, 0.08533674],
       [0.08533674, 0.23196931, 0.08533674, 0.08533674, 0.08533674,
        0.08533674, 0.08533674, 0.08533674, 0.08533674, 0.08533674],
       [0.23196931, 0.08533674, 0.08533674, 0.08533674, 0.08533674,
        0.08533674, 0.08533674, 0.08533674, 0.08533674, 0.08533674],
       [0.08533674, 0.08533674, 0.08533674, 0.08533674, 0.23196927,
        0.08533674, 0.08533674, 0.08533674, 0.08533674, 0.08533676]],
      dtype=float32)>

## Save the trained model to an external file

In [17]:
model.save('mnist_predictor.h5')