In [551]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds #tensorflow_datasets has a lot of data sets for modeling

## Preprocessing - load, shuffle, assign validation data, batch

In [552]:
#load data
mnist_dataset, mnist_info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)

In [553]:
# splitting into testing & training data sets
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [554]:
# adding a validation data set
# figuring out how large the validation series will be based on converting 10% of the test data to validation
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64) #make sure the sample count is an int

# figuring out how large the test data sample actually is (for convenience)
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [555]:
# Scale the data. (I could have also called tf's map function)
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255. #. continues the float use
    return image, label

In [556]:
scaled_train_and_validation_data = mnist_train.map(scale) #scales the entire train data set
scaled_test_data = mnist_test.map(scale) #scale the test data

In [557]:
# shuffling to make sure batching doesn't have confounding patterns (ex. by date)
BUFFER_SIZE = 10000 #make sure we don't try to shuffle everything at once if the data set is massive
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

In [558]:
# separating the validation data from the combined data set after shuffling
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

In [559]:
BATCH_SIZE = 1000
train_data = train_data.batch(BATCH_SIZE) #adds a column to the tensor indicating batches
# the validation & test data doesn't have to be batched to save computational power since they are only used for
# forward propagation, but the model will expect batching, so we will make one batch w/ all data
validation_data = validation_data.batch(num_validation_samples)
test_data = scaled_test_data.batch(num_test_samples)

In [560]:
# make inputs & targets iterable (necessary since imported as tuple w/ supervision) and loads first batch
validation_inputs, validation_targets = next(iter(validation_data))

## Model

In [561]:
# Outlining the model w/ hyperparamters
input_size = 784
output_size = 10
hidden_layer_size = 500 #guesswork

In [562]:
model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape= (28,28,1)), #each image is 28pixels x x28 x1
                            tf.keras.layers.Dense(hidden_layer_size, activation ='relu'), # make 1st hidden layer, adjusted activation function based on training data (relu seems to consistently outperform the other options for this data set)
                            tf.keras.layers.Dense(hidden_layer_size, activation ='relu'), # repeating hidden layers
                            tf.keras.layers.Dense(hidden_layer_size, activation ='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation ='relu'),
                            tf.keras.layers.Dense(output_size, activation = 'softmax') # make output layer, using softmax since this will assign probability for classification data
])

In [563]:
# Choose the optimizer & the loss function
# sparse applies one-hot encoding without an additional step
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

## Training

In [564]:
NUM_EPOCHS = 10 #arbitrary choice to start
model.fit(train_data, epochs = NUM_EPOCHS, validation_data = (validation_inputs, validation_targets), verbose =2)

Epoch 1/10
54/54 - 1s - 27ms/step - accuracy: 0.8609 - loss: 0.4775 - val_accuracy: 0.9473 - val_loss: 0.1843
Epoch 2/10
54/54 - 1s - 14ms/step - accuracy: 0.9624 - loss: 0.1283 - val_accuracy: 0.9653 - val_loss: 0.1172
Epoch 3/10
54/54 - 1s - 14ms/step - accuracy: 0.9746 - loss: 0.0837 - val_accuracy: 0.9758 - val_loss: 0.0825
Epoch 4/10
54/54 - 1s - 13ms/step - accuracy: 0.9832 - loss: 0.0548 - val_accuracy: 0.9828 - val_loss: 0.0588
Epoch 5/10
54/54 - 1s - 14ms/step - accuracy: 0.9889 - loss: 0.0376 - val_accuracy: 0.9838 - val_loss: 0.0520
Epoch 6/10
54/54 - 1s - 13ms/step - accuracy: 0.9904 - loss: 0.0311 - val_accuracy: 0.9860 - val_loss: 0.0455
Epoch 7/10
54/54 - 1s - 14ms/step - accuracy: 0.9927 - loss: 0.0241 - val_accuracy: 0.9893 - val_loss: 0.0352
Epoch 8/10
54/54 - 1s - 13ms/step - accuracy: 0.9943 - loss: 0.0180 - val_accuracy: 0.9900 - val_loss: 0.0337
Epoch 9/10
54/54 - 1s - 14ms/step - accuracy: 0.9942 - loss: 0.0176 - val_accuracy: 0.9920 - val_loss: 0.0253
Epoch 10/1

<keras.src.callbacks.history.History at 0x183ec993e30>

## Testing

In [565]:
test_loss, test_accuracy = model.evaluate(test_data)
print('Test loss: {0:.2f} Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - accuracy: 0.9775 - loss: 0.0876
Test loss: 0.09 Test accuracy: 97.75%
