In [12]:
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import ops

# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize input data
x_train, x_test = x_train / 255.0, x_test / 255.0

# Reshape input data (flattening 28x28 images into 784-dimensional vectors)
x_train = x_train.reshape(-1, 784).astype(np.float32)
x_test = x_test.reshape(-1, 784).astype(np.float32)

# Convert labels to one-hot encoding
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

# Define network parameters
input_size = 784
hidden1_size = 128
hidden2_size = 64
output_size = 10
learning_rate = 0.1
epochs = 50
batch_size = 100

# Initialize weights and biases
W1 = tf.Variable(tf.random.normal([input_size, hidden1_size], stddev=0.1))
b1 = tf.Variable(tf.zeros([hidden1_size]))
W2 = tf.Variable(tf.random.normal([hidden1_size, hidden2_size], stddev=0.1))
b2 = tf.Variable(tf.zeros([hidden2_size]))
W3 = tf.Variable(tf.random.normal([hidden2_size, output_size], stddev=0.1))
b3 = tf.Variable(tf.zeros([output_size]))

def forward_pass(x):
    hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1)
    hidden2 = tf.nn.relu(tf.matmul(hidden1, W2) + b2)
    output = tf.matmul(hidden2, W3) + b3
    return output

def compute_loss(logits, labels):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))

# Optimizer
optimizer = tf.optimizers.SGD(learning_rate)

def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        logits = forward_pass(x_batch)
        loss = compute_loss(logits, y_batch)
    gradients = tape.gradient(loss, [W1, b1, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2, W3, b3]))
    return loss

# Training loop
num_batches = x_train.shape[0] // batch_size
for epoch in range(epochs):
    avg_loss = 0
    for i in range(num_batches):
        batch_x = x_train[i * batch_size:(i + 1) * batch_size]
        batch_y = y_train[i * batch_size:(i + 1) * batch_size]
        loss = train_step(batch_x, batch_y)
        avg_loss += loss / num_batches
    print(f"Epoch {epoch + 1}, Loss: {avg_loss:.4f}")

# Evaluate model
logits_test = forward_pass(x_test)
predictions = tf.argmax(logits_test, axis=1)
y_true = tf.argmax(y_test, axis=1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, y_true), tf.float32))
print(f"Test Accuracy: {accuracy.numpy() * 100:.2f}%")

Epoch 1, Loss: 0.4531
Epoch 2, Loss: 0.2127
Epoch 3, Loss: 0.1583
Epoch 4, Loss: 0.1273
Epoch 5, Loss: 0.1064
Epoch 6, Loss: 0.0909
Epoch 7, Loss: 0.0790
Epoch 8, Loss: 0.0694
Epoch 9, Loss: 0.0614
Epoch 10, Loss: 0.0547
Epoch 11, Loss: 0.0488
Epoch 12, Loss: 0.0435
Epoch 13, Loss: 0.0388
Epoch 14, Loss: 0.0347
Epoch 15, Loss: 0.0309
Epoch 16, Loss: 0.0275
Epoch 17, Loss: 0.0245
Epoch 18, Loss: 0.0218
Epoch 19, Loss: 0.0194
Epoch 20, Loss: 0.0173
Epoch 21, Loss: 0.0154
Epoch 22, Loss: 0.0137
Epoch 23, Loss: 0.0122
Epoch 24, Loss: 0.0109
Epoch 25, Loss: 0.0098
Epoch 26, Loss: 0.0087
Epoch 27, Loss: 0.0078
Epoch 28, Loss: 0.0070
Epoch 29, Loss: 0.0063
Epoch 30, Loss: 0.0057
Epoch 31, Loss: 0.0052
Epoch 32, Loss: 0.0047
Epoch 33, Loss: 0.0043
Epoch 34, Loss: 0.0039
Epoch 35, Loss: 0.0036
Epoch 36, Loss: 0.0033
Epoch 37, Loss: 0.0031
Epoch 38, Loss: 0.0028
Epoch 39, Loss: 0.0026
Epoch 40, Loss: 0.0025
Epoch 41, Loss: 0.0023
Epoch 42, Loss: 0.0022
Epoch 43, Loss: 0.0020
Epoch 44, Loss: 0.00