In [37]:
import numpy as np
from keras.datasets import mnist

##Loading Data

In [38]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

##Reshaping and normalizing the data

In [39]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_test  = X_test.reshape(X_test.shape[0], -1)

X_train = X_train / 255.0
X_test  = X_test / 255.0

##Applying one hot encoding

In [40]:
y_train_new = np.eye(10)[y_train]
y_test_new  = np.eye(10)[y_test]

##Activation functions

In [41]:
def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return z > 0

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

##Loss function

In [42]:
def cross_entropy(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-9), axis=1))

##Weights initialization

In [43]:
np.random.seed(42)

input_size  = 784
hidden_size = 128
output_size = 10

W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))

W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))

##Neural network training

In [44]:
learning_rate = 0.1
epochs = 10
batch_size = 64

for epoch in range(epochs):
    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train_new[i:i+batch_size]

        # Forward direction
        z1 = X_batch @ W1 + b1
        a1 = relu(z1)

        z2 = a1 @ W2 + b2
        y_pred = softmax(z2)

        # Backpropagation
        dz2 = y_pred - y_batch
        dW2 = a1.T @ dz2 / batch_size
        db2 = np.mean(dz2, axis=0, keepdims=True)

        da1 = dz2 @ W2.T
        dz1 = da1 * relu_derivative(z1)
        dW1 = X_batch.T @ dz1 / batch_size
        db1 = np.mean(dz1, axis=0, keepdims=True)

        # Updating weights
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1


    train_loss = cross_entropy(y_train_new, softmax(relu(X_train @ W1 + b1) @ W2 + b2))
    print(f"Epoch {epoch+1}/{epochs} | Loss: {train_loss:.4f}")


Epoch 1/10 | Loss: 0.2921
Epoch 2/10 | Loss: 0.2096
Epoch 3/10 | Loss: 0.1629
Epoch 4/10 | Loss: 0.1332
Epoch 5/10 | Loss: 0.1121
Epoch 6/10 | Loss: 0.0965
Epoch 7/10 | Loss: 0.0845
Epoch 8/10 | Loss: 0.0754
Epoch 9/10 | Loss: 0.0678
Epoch 10/10 | Loss: 0.0618


##Accuracy calculation

In [45]:
def accuracy(X, y):
    z1 = X @ W1 + b1
    a1 = relu(z1)
    z2 = a1 @ W2 + b2
    y_pred = np.argmax(softmax(z2), axis=1)
    return np.mean(y_pred == y)

print("Train Accuracy:", accuracy(X_train, y_train))
print("Test Accuracy:", accuracy(X_test, y_test))


Train Accuracy: 0.9822833333333333
Test Accuracy: 0.9734
