In [120]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.keras.backend.clear_session()

In [121]:
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.mnist.load_data()

In [122]:
X_train = train_data[:-10_000]
y_train = train_labels[:-10_000]
X_test = test_data
y_test = test_labels
X_val = X_train[-10_000:]
y_val = y_train[-10_000:]

In [123]:
X_train = X_train.astype(np.float32) / 255
X_test = X_test.astype(np.float32) / 255
X_val = X_val.astype(np.float32) / 255

In [124]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_val.shape, y_val.shape

((50000, 28, 28),
 (50000,),
 (10000, 28, 28),
 (10000,),
 (10000, 28, 28),
 (10000,))

In [125]:
X_train = X_train.reshape(-1, 28 * 28)
X_train.shape

(50000, 784)

In [126]:
X_test = X_test.reshape(-1, 28 * 28)
X_test.shape

(10000, 784)

In [127]:
X_val = X_val.reshape(-1, 28 * 28)
X_val.shape

(10000, 784)

In [128]:
def get_model():

    model = tf.keras.Sequential([
    tf.keras.layers.Dense(300, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
    ])

    return model

In [129]:
model = get_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 300)               235500    
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 266610 (1.02 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [130]:
model.compile(
    loss="categorical_crossentropy",
    metrics="accuracy",
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01)
)

In [131]:
from pathlib import Path
from time import strftime

def get_run_logdir(root_logdir="my_logs"):
    return Path(root_logdir) / strftime("run_%Y_%m_%d_%H_%M_%S")

run_logdir = get_run_logdir()

In [132]:
y_train_encoded = tf.keras.utils.to_categorical(y_train, num_classes=10)

y_train_encoded.shape

(50000, 10)

In [133]:
y_val_encoded = tf.keras.utils.to_categorical(y_val, num_classes=10)

y_val_encoded.shape

(10000, 10)

In [134]:
y_test_encoded = tf.keras.utils.to_categorical(y_test, num_classes=10)

y_test_encoded.shape

(10000, 10)

In [None]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("my_checkpoints", save_best_only=True)

tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir, profile_batch=(100, 200))

history = model.fit(X_train, y_train_encoded, batch_size=256, epochs=500, callbacks=[checkpoint_cb, early_stopping_cb, tensorboard_cb], validation_data=(X_val, y_val_encoded))

In [None]:
%load_ext tensorboard
%tensorboard --logdir=./my_logs

In [138]:
train_loss, train_accuracy = model.evaluate(X_train, y_train_encoded)
print("Training Loss:", train_loss)
print("Training Accuracy:", train_accuracy)

val_loss, val_accuracy = model.evaluate(X_val, y_val_encoded)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_accuracy)

test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Training Loss: 0.0016841816250234842
Training Accuracy: 1.0
Validation Loss: 0.0018096660496667027
Validation Accuracy: 1.0
Test Loss: 0.08135031163692474
Test Accuracy: 0.9782999753952026
