<a href="https://colab.research.google.com/github/Meena-2826/Handwritten-Digit-Recognition/blob/main/Handwritten_Digit_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target.astype(int)

# Normalize the input data
X = X / 255.0

# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)  # Updated for newer versions
y_onehot = encoder.fit_transform(y.values.reshape(-1, 1))

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# Initialize network parameters
input_size = 784  # 28x28 pixels
hidden_size = 128  # Increased hidden layer size
output_size = 10  # Digits 0-9

# Xavier Initialization
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2 / input_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2 / hidden_size)
b2 = np.zeros((1, output_size))

# Activation function: ReLU
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

# Activation function: Softmax
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Forward pass
def forward_pass(X):
    global Z1, A1, Z2, A2
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return A2

# Loss function: Cross-entropy
def compute_loss(y_true, y_pred):
    n_samples = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-8)) / n_samples

# Backward pass
def backward_pass(X, y_true, y_pred):
    global W1, b1, W2, b2
    n_samples = X.shape[0]

    # Gradients for output layer
    dZ2 = y_pred - y_true
    dW2 = np.dot(A1.T, dZ2) / n_samples
    db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

    # Gradients for hidden layer
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / n_samples
    db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

    # Update weights
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

# Training the model with mini-batch gradient descent
epochs = 50  # Increased epochs
batch_size = 64
learning_rate = 0.01  # Reduced learning rate for smoother convergence

for epoch in range(epochs):
    for i in range(0, X_train.shape[0], batch_size):
        # Get mini-batch
        X_batch = X_train[i:i + batch_size]
        y_batch = y_train[i:i + batch_size]

        # Forward pass
        y_pred = forward_pass(X_batch)

        # Backward pass
        backward_pass(X_batch, y_batch, y_pred)

    # Compute loss after each epoch
    y_pred_train = forward_pass(X_train)
    loss = compute_loss(y_train, y_pred_train)

    # Print loss
    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

# Evaluate the model
y_test_pred = forward_pass(X_test)
y_test_labels = np.argmax(y_test_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)
accuracy = accuracy_score(y_true_labels, y_test_labels)

print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 5/50, Loss: 0.2875
Epoch 10/50, Loss: 0.2264
Epoch 15/50, Loss: 0.1885
Epoch 20/50, Loss: 0.1612
Epoch 25/50, Loss: 0.1407
Epoch 30/50, Loss: 0.1248
Epoch 35/50, Loss: 0.1120
Epoch 40/50, Loss: 0.1015
Epoch 45/50, Loss: 0.0928
Epoch 50/50, Loss: 0.0855
Test Accuracy: 96.53%
