In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [3]:
# Load MNIST dataset from OpenML
data = fetch_openml('mnist_784', version=1, parser='auto')
X = data.data.astype(np.float32)
y = data.target.astype(int)

# Split dataset into training and test sets
X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y.values.reshape(-1, 1))
y_test = encoder.transform(y_test.values.reshape(-1, 1))

def sigmoid(X):
    return 1 / (1 + np.exp(-X))

def relu(X):
    return np.maximum(0, X)

def relu_derivative(x):
    return (x > 0).astype(float)

In [5]:
def ann(layers, lr, epochs, X, Y, X_test, y_test):
    weights = [np.random.randn(layers[i], layers[i+1]) * 0.01 for i in range(len(layers) - 1)]
    biases = [np.zeros((1, layers[i+1])) for i in range(len(layers) - 1)]

    def forward(X):
        activations = [X]
        Z_values = []
        for i in range(len(weights) - 1):
            Z = np.dot(activations[-1], weights[i]) + biases[i]
            Z_values.append(Z)
            X = relu(Z)
            activations.append(X)
        Z_out = np.dot(activations[-1], weights[-1]) + biases[-1]
        Z_values.append(Z_out)
        output = sigmoid(Z_out)
        activations.append(output)
        return activations, Z_values

    def backward(activations, Z_values, y):
        m = X.shape[0]
        dZ = activations[-1] - y
        deltas = [dZ]
        
        for i in range(len(weights) - 1, 0, -1):
            dZ = (deltas[-1] @ weights[i].T) * relu_derivative(Z_values[i - 1])
            deltas.append(dZ)
        
        deltas.reverse()
        
        for i in range(len(weights)):
            dW = (activations[i].T @ deltas[i]) / m
            dB = np.sum(deltas[i], axis=0, keepdims=True) / m
            
            weights[i] -= lr * dW
            biases[i] -= lr * dB

    for epoch in range(epochs):
        activations, Z_values = forward(X)
        backward(activations, Z_values, Y)
        if epoch % 10 == 0:
            loss = -np.mean(Y * np.log(activations[-1] + 1e-8))
            print(f"Epoch {epoch}, Loss: {loss}")

    # Calculate Training Accuracy
    y_train_pred = np.argmax(forward(X)[0][-1], axis=1)  
    y_train_true = np.argmax(y, axis=1)  
    train_accuracy = np.mean(y_train_pred == y_train_true)
    print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
    
    # Calculate Test Accuracy
    y_test_pred = np.argmax(forward(X_test)[0][-1], axis=1)  
    y_test_true = np.argmax(y_test, axis=1)  
    test_accuracy = np.mean(y_test_pred == y_test_true)
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

    def predict(X_new):
        activations, _ = forward(X_new)
        return np.argmax(activations[-1], axis=1)

    print(f"Predicted Labels: {predict(X_test[:5])}")

layers = [784, 160, 100, 10]
lr = 0.01
epochs = 100
ann(layers, lr, epochs, X, y, X_test, y_test)

Epoch 0, Loss: 0.07220720235260465
Epoch 10, Loss: 0.16477213735484578
Epoch 20, Loss: 0.12035822248956236
Epoch 30, Loss: 0.07472639312017305
Epoch 40, Loss: 0.09078593887481874
Epoch 50, Loss: 0.049184471285311136
Epoch 60, Loss: 0.04755757600746576
Epoch 70, Loss: 0.04236004335572294
Epoch 80, Loss: 0.038000457881497425
Epoch 90, Loss: 0.034442001819515976
Training Accuracy: 91.67%
Test Accuracy: 91.73%
Predicted Labels: [8 4 8 7 7]
