Name: Aditya Acharya   Roll No. 23AI005

Handwritten Character Recognition (MNIST) with ANN

In [3]:
pip install python-mnist

Note: you may need to restart the kernel to use updated packages.


In [4]:
# MNIST ANN from Scratch (NumPy Implementation)

import struct
import numpy as np
import matplotlib.pyplot as plt

# Load MNIST dataset (IDX format)
def load_images(filename):
    with open(filename, 'rb') as f:  
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        data = np.frombuffer(f.read(), dtype=np.uint8)
        return data.reshape(num, rows*cols).astype(np.float32) / 255.0

def load_labels(filename):
    with open(filename, 'rb') as f:  
        magic, num = struct.unpack(">II", f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

def one_hot(labels, num_classes=10):
    return np.eye(num_classes)[labels]

# Activation Functions
def relu(x): return np.maximum(0,x)
def relu_deriv(x): return (x>0).astype(np.float32)

def softmax(x):
    x = x - np.max(x, axis=1, keepdims=True)
    exp = np.exp(x)
    return exp / np.sum(exp, axis=1, keepdims=True)

def cross_entropy(y, yhat):
    m = y.shape[0]
    return -np.sum(y*np.log(yhat+1e-12)) / m

# ANN Training Function
def train(X_train, Y_train, X_test, Y_test, epochs=5, batch_size=64, lr=0.1):
    n_in, n_hidden, n_out = 784, 128, 10

    # He initialization
    W1 = np.random.randn(n_in,n_hidden)*np.sqrt(2/n_in)
    b1 = np.zeros((1,n_hidden))
    W2 = np.random.randn(n_hidden,n_out)*np.sqrt(2/n_hidden)
    b2 = np.zeros((1,n_out))

    history = {"loss":[], "acc":[]}

    for epoch in range(epochs):
        idx = np.arange(X_train.shape[0]); np.random.shuffle(idx)
        X_train, Y_train = X_train[idx], Y_train[idx]

        total_loss = 0; steps = 0
        for start in range(0, X_train.shape[0], batch_size):
            Xb = X_train[start:start+batch_size]
            Yb = Y_train[start:start+batch_size]

            # Forward
            Z1 = Xb.dot(W1)+b1
            A1 = relu(Z1)
            Z2 = A1.dot(W2)+b2
            Y_hat = softmax(Z2)

            loss = cross_entropy(Yb, Y_hat)
            total_loss += loss; steps+=1

            # Backprop
            m = Xb.shape[0]
            dZ2 = (Y_hat - Yb)/m
            dW2 = A1.T.dot(dZ2)
            db2 = np.sum(dZ2,axis=0,keepdims=True)
            dA1 = dZ2.dot(W2.T)
            dZ1 = dA1*relu_deriv(Z1)
            dW1 = Xb.T.dot(dZ1)
            db1 = np.sum(dZ1,axis=0,keepdims=True)

            # Update
            W1 -= lr*dW1; b1 -= lr*db1
            W2 -= lr*dW2; b2 -= lr*db2

        # Evaluate
        logits = relu(X_test.dot(W1)+b1).dot(W2)+b2
        preds = np.argmax(softmax(logits),axis=1)
        acc = np.mean(preds == np.argmax(Y_test,axis=1))

        history["loss"].append(total_loss/steps)
        history["acc"].append(acc)

        print(f"Epoch {epoch+1}, Loss={total_loss/steps:.4f}, Test Acc={acc:.4f}")

    return W1,b1,W2,b2,history

# Load Data
X_train = load_images("train-images.idx3-ubyte")
y_train = load_labels("train-labels.idx1-ubyte")
X_test = load_images("t10k-images.idx3-ubyte")
y_test = load_labels("t10k-labels.idx1-ubyte")

Y_train = one_hot(y_train)
Y_test = one_hot(y_test)

print("Data loaded:", X_train.shape, Y_train.shape)

# Train Model
W1,b1,W2,b2,history = train(X_train, Y_train, X_test, Y_test, epochs=20, batch_size=10, lr=0.1)

# Plot Training History
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history["loss"], label="Loss")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Training Loss")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history["acc"], label="Test Accuracy")
plt.xlabel("Epoch"); plt.ylabel("Accuracy"); plt.title("Test Accuracy")
plt.legend()
plt.show()

# Sample Prediction
import random

# Number of samples to test
num_samples = 20  

# Pick random indices
indices = random.sample(range(X_test.shape[0]), num_samples)

for i in indices:
    img = X_test[i].reshape(28,28)
    logits = relu(X_test[i:i+1].dot(W1)+b1).dot(W2)+b2
    pred = np.argmax(softmax(logits))

    plt.imshow(img, cmap="Oranges")
    plt.title(f"Predicted: {pred}, Actual: {y_test[i]}")
    plt.axis('off')
    plt.show()


FileNotFoundError: [Errno 2] No such file or directory: '/Users/adityaacharya/Downloads/train-images.Cidx3-ubyte'