In [2]:
import numpy as np
from tensorflow.keras.datasets import mnist

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize
X_train = X_train / 255.0
X_test = X_test / 255.0

# Flatten
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

# One-hot encoding
def one_hot(y):
    onehot = np.zeros((y.size, 10))
    onehot[np.arange(y.size), y] = 1
    return onehot

y_train = one_hot(y_train)
y_test = one_hot(y_test)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [4]:
def init_params():
    W1 = np.random.randn(784, 128) * np.sqrt(2. / 784)
    b1 = np.zeros((1, 128))

    W2 = np.random.randn(128, 64) * np.sqrt(2. / 128)
    b2 = np.zeros((1, 64))

    W3 = np.random.randn(64, 10) * np.sqrt(2. / 64)
    b3 = np.zeros((1, 10))

    return W1, b1, W2, b2, W3, b3


In [5]:
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    exp = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp / np.sum(exp, axis=1, keepdims=True)


In [6]:
def forward(X, W1, b1, W2, b2, W3, b3):
    Z1 = X @ W1 + b1
    A1 = relu(Z1)

    Z2 = A1 @ W2 + b2
    A2 = relu(Z2)

    Z3 = A2 @ W3 + b3
    A3 = softmax(Z3)

    return Z1, A1, Z2, A2, Z3, A3


In [7]:
def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred + 1e-8)) / m
    return loss


In [8]:
def backward(X, y, Z1, A1, Z2, A2, A3, W2, W3):
    m = X.shape[0]

    dZ3 = A3 - y
    dW3 = A2.T @ dZ3 / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    dA2 = dZ3 @ W3.T
    dZ2 = dA2 * relu_derivative(Z2)
    dW2 = A1.T @ dZ2 / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = dZ2 @ W2.T
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = X.T @ dZ1 / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2, dW3, db3


In [9]:
def update(params, grads, lr):
    for i in range(len(params)):
        params[i] -= lr * grads[i]
    return params


In [12]:
def train(X, y, epochs=20, lr=0.01, batch_size=64):

    W1, b1, W2, b2, W3, b3 = init_params()
    m = X.shape[0]

    for epoch in range(epochs):

        # Shuffle dataset
        perm = np.random.permutation(m)
        X_shuffled = X[perm]
        y_shuffled = y[perm]

        for i in range(0, m, batch_size):

            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]

            Z1, A1, Z2, A2, Z3, A3 = forward(
                X_batch, W1, b1, W2, b2, W3, b3
            )

            grads = backward(
                X_batch, y_batch,
                Z1, A1, Z2, A2, A3,
                W2, W3
            )

            params = [W1, b1, W2, b2, W3, b3]
            params = update(params, grads, lr)

            W1, b1, W2, b2, W3, b3 = params

        # Evaluate loss after each epoch
        _, _, _, _, _, A3_full = forward(X, W1, b1, W2, b2, W3, b3)
        loss = compute_loss(y, A3_full)

        print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

    return W1, b1, W2, b2, W3, b3


In [13]:
W1, b1, W2, b2, W3, b3 = train(X_train, y_train)

Epoch 1, Loss: 0.3914
Epoch 2, Loss: 0.3027
Epoch 3, Loss: 0.2647
Epoch 4, Loss: 0.2400
Epoch 5, Loss: 0.2161
Epoch 6, Loss: 0.2011
Epoch 7, Loss: 0.1856
Epoch 8, Loss: 0.1753
Epoch 9, Loss: 0.1635
Epoch 10, Loss: 0.1547
Epoch 11, Loss: 0.1472
Epoch 12, Loss: 0.1402
Epoch 13, Loss: 0.1317
Epoch 14, Loss: 0.1252
Epoch 15, Loss: 0.1202
Epoch 16, Loss: 0.1151
Epoch 17, Loss: 0.1126
Epoch 18, Loss: 0.1068
Epoch 19, Loss: 0.1012
Epoch 20, Loss: 0.1005


In [14]:
def accuracy(X, y):
    _, _, _, _, _, A3 = forward(X, W1, b1, W2, b2, W3, b3)
    predictions = np.argmax(A3, axis=1)
    labels = np.argmax(y, axis=1)
    return np.mean(predictions == labels)

print("Test Accuracy:", accuracy(X_test, y_test))


Test Accuracy: 0.9652


In [15]:
np.savez("model_weights.npz",
         W1=W1, b1=b1,
         W2=W2, b2=b2,
         W3=W3, b3=b3)

print("Model saved successfully.")

Model saved successfully.
