### Manual Training Using `tf.GradientTape()`

In [15]:
import tensorflow as tf
import numpy as np

In [16]:
# Load MNIST data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()


x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

In [17]:
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)
y_train_onehot = tf.one_hot(y_train, 10)
y_test_onehot = tf.one_hot(y_test, 10)

In [18]:
# Hyperparameters
learning_rate = 0.001
epochs = 5
batch_size = 64

In [19]:
# Weights & Biases Initialization
W1 = tf.Variable(tf.random.normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))

W2 = tf.Variable(tf.random.normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))

W3 = tf.Variable(tf.random.normal([128, 64], stddev=0.1))
b3 = tf.Variable(tf.zeros([64]))

W4 = tf.Variable(tf.random.normal([64, 10], stddev=0.1))
b4 = tf.Variable(tf.zeros([10]))


In [20]:
# Create training dataset
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train_onehot)).shuffle(10000).batch(batch_size)

# Training loop using GradientTape
for epoch in range(epochs):
    epoch_loss = 0
    for step, (x_batch, y_batch) in enumerate(train_ds):
        with tf.GradientTape() as tape:
            # Forward pass
            h1 = tf.nn.relu(tf.matmul(x_batch, W1) + b1)
            h2 = tf.nn.relu(tf.matmul(h1, W2) + b2)
            h3 = tf.nn.relu(tf.matmul(h2, W3) + b3)
            logits = tf.matmul(h3, W4) + b4

            # Loss
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_batch, logits=logits))

        # Gradients & parameter update
        gradients = tape.gradient(loss, [W1, b1, W2, b2, W3, b3, W4, b4])
        for var, grad in zip([W1, b1, W2, b2, W3, b3, W4, b4], gradients):
            var.assign_sub(learning_rate * grad)

        epoch_loss += loss.numpy()
    
    print(f"Epoch {epoch + 1}: Loss = {epoch_loss / (step + 1):.4f}")

Epoch 1: Loss = 2.1826
Epoch 2: Loss = 1.8023
Epoch 3: Loss = 1.2842
Epoch 4: Loss = 0.9104
Epoch 5: Loss = 0.7156


In [21]:
# Evaluation
def evaluate(x, y):
    h1 = tf.nn.relu(tf.matmul(x, W1) + b1)
    h2 = tf.nn.relu(tf.matmul(h1, W2) + b2)
    h3 = tf.nn.relu(tf.matmul(h2, W3) + b3)
    logits = tf.matmul(h3, W4) + b4
    preds = tf.argmax(logits, axis=1)
    true = tf.argmax(y, axis=1)
    acc = tf.reduce_mean(tf.cast(tf.equal(preds, true), tf.float32))
    return acc

test_acc = evaluate(x_test, y_test_onehot)
print(f"Test Accuracy: {test_acc.numpy() * 100:.2f}%")

Test Accuracy: 82.32%


### Keras `model.fit()` Training


In [22]:
from tensorflow.keras import layers, models, Input

# Define input
inputs = Input(shape=(784,))

# Hidden layers
x = layers.Dense(256, activation='relu')(inputs)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(64, activation='relu')(x)

# Output layer
outputs = layers.Dense(10)(x)

model = models.Model(inputs=inputs, outputs=outputs)
model.summary(show_trainable = True)

In [23]:

model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

model.fit(x_train, y_train_onehot, epochs=5, batch_size=64)

loss, acc = model.evaluate(x_test, y_test_onehot)
print(f"Test Accuracy: {acc * 100:.2f}%")


Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.8689 - loss: 0.4294
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9703 - loss: 0.0955
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9798 - loss: 0.0642
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9853 - loss: 0.0456
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9889 - loss: 0.0350
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9734 - loss: 0.0943
Test Accuracy: 97.86%
