In [2]:
import tensorflow as tf
import random
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras # type: ignore
from keras import layers, Sequential, callbacks
from keras.utils import to_categorical

In [3]:
RANDOM_SEED = 42

tf.random.set_seed(RANDOM_SEED)
LEARNING_RATE = 1e-3
HIDDEN_UNITS = 128

In [4]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

print("Shapes (train, test):", X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Shapes (train, test): (60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [5]:
X_train = X_train.astype("float32")/255.0
X_test = X_test.astype("float32")/255.0

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print("Shapes (train, test):", X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Shapes (train, test): (60000, 784) (60000, 10) (10000, 784) (10000, 10)


In [6]:
def build_mlp(input_dim=784, learning_rate=LEARNING_RATE, hidden_units=HIDDEN_UNITS):
    model = Sequential(
        [
            layers.InputLayer((input_dim,)),
            layers.Dense(hidden_units, activation="relu"),
            layers.Dense(hidden_units//2, activation="relu"),
            layers.Dense(10, activation="softmax")
        ]
    )
    opt = keras.optimizers.Adam(learning_rate=learning_rate)
    
    model.compile(
        optimizer=opt,
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model
    
model = build_mlp()
model.summary()

In [None]:
early_stop = callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
checkpoint = callbacks.ModelCheckpoint("best_mlp_mnist.h5", monitor="val_loss", save_best_only=True)

In [None]:
history = model.fit(
    X_train,
    y_train,
    validation_split=0.1,
    callbacks=[early_stop, checkpoint],
    verbose=2,
    epochs=30,
    batch_size=128
)

Epoch 1/30




422/422 - 2s - 6ms/step - accuracy: 0.8977 - loss: 0.3599 - val_accuracy: 0.9593 - val_loss: 0.1421
Epoch 2/30




422/422 - 1s - 3ms/step - accuracy: 0.9565 - loss: 0.1496 - val_accuracy: 0.9677 - val_loss: 0.1057
Epoch 3/30




422/422 - 1s - 3ms/step - accuracy: 0.9702 - loss: 0.1021 - val_accuracy: 0.9713 - val_loss: 0.0969
Epoch 4/30




422/422 - 1s - 3ms/step - accuracy: 0.9784 - loss: 0.0758 - val_accuracy: 0.9735 - val_loss: 0.0951
Epoch 5/30




422/422 - 1s - 3ms/step - accuracy: 0.9838 - loss: 0.0581 - val_accuracy: 0.9745 - val_loss: 0.0948
Epoch 6/30




422/422 - 1s - 3ms/step - accuracy: 0.9875 - loss: 0.0456 - val_accuracy: 0.9743 - val_loss: 0.0932
Epoch 7/30
422/422 - 1s - 3ms/step - accuracy: 0.9902 - loss: 0.0356 - val_accuracy: 0.9747 - val_loss: 0.0934
Epoch 8/30
422/422 - 1s - 3ms/step - accuracy: 0.9929 - loss: 0.0282 - val_accuracy: 0.9732 - val_loss: 0.1028
Epoch 9/30
422/422 - 1s - 3ms/step - accuracy: 0.9943 - loss: 0.0234 - val_accuracy: 0.9720 - val_loss: 0.1113
Epoch 10/30
422/422 - 1s - 3ms/step - accuracy: 0.9948 - loss: 0.0193 - val_accuracy: 0.9740 - val_loss: 0.1104
Epoch 11/30
422/422 - 1s - 3ms/step - accuracy: 0.9952 - loss: 0.0165 - val_accuracy: 0.9750 - val_loss: 0.1085


In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_acc:.4f}")

Test loss: 0.0935, Test accuracy: 0.9730
