In [13]:
import numpy as np
from tensorflow.keras.datasets import mnist

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Preprocess data
x_train = x_train.reshape(-1, 28*28) / 255.0
x_test = x_test.reshape(-1, 28*28) / 255.0

def one_hot(y, num_classes=10):
    return np.eye(num_classes)[y]

y_train_onehot = one_hot(y_train)
y_test_onehot = one_hot(y_test)


input_size = 784
hidden1_size = 512
hidden2_size = 256
output_size = 10
lambda_reg = 0.001


np.random.seed(42)
W1 = np.random.randn(input_size, hidden1_size) * np.sqrt(2. / input_size)
b1 = np.zeros(hidden1_size)
W2 = np.random.randn(hidden1_size, hidden2_size) * np.sqrt(2. / hidden1_size)
b2 = np.zeros(hidden2_size)
W3 = np.random.randn(hidden2_size, output_size) * np.sqrt(2. / hidden2_size)
b3 = np.zeros(output_size)


def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

# Forward propagation
def forward(x):
    z1 = np.dot(x, W1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = relu(z2)
    z3 = np.dot(a2, W3) + b3
    a3 = softmax(z3)
    return z1, a1, z2, a2, z3, a3


def compute_loss(y_true, a3):
    m = y_true.shape[0]
    log_likelihood = -np.log(a3[range(m), np.argmax(y_true, axis=1)] + 1e-10)
    loss = np.sum(log_likelihood) / m
    reg_term = (lambda_reg/(2*m)) * (np.sum(W1**2) + np.sum(W2**2) + np.sum(W3**2))
    return loss + reg_term

# Backward propagation
def backward(x, y_true, z1, a1, z2, a2, a3):
    m = x.shape[0]

    # Output layer gradient
    dz3 = (a3 - y_true) / m
    dw3 = np.dot(a2.T, dz3) + (lambda_reg/m) * W3
    db3 = np.sum(dz3, axis=0)

    # Hidden layer 2
    da2 = np.dot(dz3, W3.T)
    dz2 = da2 * (z2 > 0)
    dw2 = np.dot(a1.T, dz2) + (lambda_reg/m) * W2
    db2 = np.sum(dz2, axis=0)

    # Hidden layer 1
    da1 = np.dot(dz2, W2.T)
    dz1 = da1 * (z1 > 0)
    dw1 = np.dot(x.T, dz1) + (lambda_reg/m) * W1
    db1 = np.sum(dz1, axis=0)

    return dw1, db1, dw2, db2, dw3, db3


epochs = 50
batch_size = 128
learning_rate = 0.01


n_samples = x_train.shape[0]
for epoch in range(epochs):
    # Shuffle data
    permutation = np.random.permutation(n_samples)
    x_train_shuffled = x_train[permutation]
    y_train_shuffled = y_train_onehot[permutation]

    # Mini-batch gradient descent
    for i in range(0, n_samples, batch_size):
        x_batch = x_train_shuffled[i:i+batch_size]
        y_batch = y_train_shuffled[i:i+batch_size]

        # Forward pass
        z1, a1, z2, a2, z3, a3 = forward(x_batch)

        # Backward pass
        dw1, db1, dw2, db2, dw3, db3 = backward(x_batch, y_batch, z1, a1, z2, a2, a3)

        # Update parameters
        W1 -= learning_rate * dw1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dw2
        b2 -= learning_rate * db2
        W3 -= learning_rate * dw3
        b3 -= learning_rate * db3

    _, _, _, _, _, a3_train = forward(x_train)
    train_loss = compute_loss(y_train_onehot, a3_train)
    train_preds = np.argmax(a3_train, axis=1)
    train_acc = np.mean(train_preds == y_train)

    _, _, _, _, _, a3_test = forward(x_test)
    test_preds = np.argmax(a3_test, axis=1)
    test_acc = np.mean(test_preds == y_test)

    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}\n")

# Final evaluation
_, _, _, _, _, a3_test = forward(x_test)
test_preds = np.argmax(a3_test, axis=1)
final_acc = np.mean(test_preds == y_test)
print(f"Final Test Accuracy: {final_acc:.4f}")

Epoch 1/50
Train Loss: 0.5070, Accuracy: 0.8701
Test Accuracy: 0.8789

Epoch 2/50
Train Loss: 0.3689, Accuracy: 0.8994
Test Accuracy: 0.9047

Epoch 3/50
Train Loss: 0.3182, Accuracy: 0.9116
Test Accuracy: 0.9146

Epoch 4/50
Train Loss: 0.2872, Accuracy: 0.9192
Test Accuracy: 0.9222

Epoch 5/50
Train Loss: 0.2666, Accuracy: 0.9247
Test Accuracy: 0.9271

Epoch 6/50
Train Loss: 0.2496, Accuracy: 0.9306
Test Accuracy: 0.9320

Epoch 7/50
Train Loss: 0.2355, Accuracy: 0.9340
Test Accuracy: 0.9345

Epoch 8/50
Train Loss: 0.2219, Accuracy: 0.9379
Test Accuracy: 0.9364

Epoch 9/50
Train Loss: 0.2107, Accuracy: 0.9415
Test Accuracy: 0.9413

Epoch 10/50
Train Loss: 0.2010, Accuracy: 0.9435
Test Accuracy: 0.9427

Epoch 11/50
Train Loss: 0.1922, Accuracy: 0.9458
Test Accuracy: 0.9444

Epoch 12/50
Train Loss: 0.1846, Accuracy: 0.9479
Test Accuracy: 0.9452

Epoch 13/50
Train Loss: 0.1764, Accuracy: 0.9504
Test Accuracy: 0.9473

Epoch 14/50
Train Loss: 0.1700, Accuracy: 0.9526
Test Accuracy: 0.9486

E

KeyboardInterrupt: 