In [1]:
from tensorflow.keras.datasets import mnist
import numpy as np

In [2]:
(X_train,y_train),(X_test,y_test) = mnist.load_data()

In [None]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [None]:
X_train = X_train.reshape(-1,784)
X_test = X_test.reshape(-1,784)

In [None]:
def one_hot(y, num_classes=10):
    onehot=np.zeros((y.size, num_classes))
    onehot[np.arange(y.size),y] = 1
    return onehot


In [None]:
y_train = one_hot(y_train)
y_test = one_hot(y_test)

In [None]:
input_size = 784
hidden_size = 128
output_size = 10

np.random.seed(42)

W1 = np.random.randn(input_size,hidden_size) * 0.01
b1 = np.zeros((1,hidden_size))

W2 = np.random.randn(hidden_size,output_size) * 0.01
b2 = np.zeros((1,output_size))

In [None]:
def leaky_relu(Z):
    return np.where(Z > 0,Z,0.01*Z)

def leaky_relu_derivative(Z):
    return np.where(Z>0,1,0.01)
def softmax(Z):
    expZ = np.exp(Z - np.max(Z,axis=1,keepdims = True))
    return expZ / np.sum(expZ, axis=1,keepdims = True)

In [None]:
def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred + 1e-9)) / m
    return loss

In [None]:
def forward(X):
    Z1 = X @ W1 + b1
    A1 = leaky_relu(Z1)
    Z2 = A1 @ W2 + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

In [None]:
def backward(X, y, Z1, A1, A2):
    global W1, W2, b1, b2
    
    m = X.shape[0]

    dZ2 = A2 - y
    dW2 = A1.T @ dZ2 / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = dZ2 @ W2.T
    dZ1 = dA1 * leaky_relu_derivative(Z1)
    dW1 = X.T @ dZ1 / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    lr = 0.1
    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1

In [None]:
def accuracy(X, y_true):
    _, _, _, A2 = forward(X)
    predictions = np.argmax(A2, axis=1)
    actual = np.argmax(y_true, axis=1)
    return np.mean(predictions == actual)

print("Train Accuracy:", accuracy(X_train, y_train))
print("Test Accuracy:", accuracy(X_test, y_test))

In [None]:
epochs = 200

for epoch in range(epochs):
    Z1, A1, Z2, A2 = forward(X_train)
    loss = compute_loss(y_train, A2)
    backward(X_train, y_train, Z1, A1, A2)

    if epoch % 10 == 0:
        print("Epoch:", epoch)
        print("Train Acc:", accuracy(X_train, y_train))
        print("Test Acc:", accuracy(X_test, y_test))