In [1]:
import numpy as np
from torchvision.datasets import MNIST

In [2]:
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)
    
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    
    mnist_data = np.array(mnist_data, dtype='float64')
    mnist_labels = np.array(mnist_labels)
    
    mnist_labels = one_hot_encode(mnist_labels, num_classes=10)
    
    return mnist_data, mnist_labels
    
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

print(f"train_X shape: {train_X.shape}")
print(f"train_Y (one-hot) shape: {train_Y.shape}")
print(f"test_X shape: {test_X.shape}")
print(f"test_Y (one-hot) shape: {test_Y.shape}")

train_X shape: (60000, 784)
train_Y (one-hot) shape: (60000, 10)
test_X shape: (10000, 784)
test_Y (one-hot) shape: (10000, 10)


## Normalize the data

In [3]:
train_X /= 255.0
test_X /= 255.0

In [4]:
print(f"Min value: {min([min(t) for t in train_X])}")
print(f"Max value: {max([max(t) for t in train_X])}")

Min value: 0.0
Max value: 1.0


## Hyperparameters

In [11]:
input_size = train_X.shape[1]
output_size = train_Y.shape[1]
batch_size = 100
learning_rate = 0.05
epochs = 50

## Batches

In [12]:
def generate_batches(X, Y, batch_size):
    num_samples = X.shape[0]
    for i in range(0, num_samples, batch_size):
        X_batch = X[i:i+batch_size]
        Y_batch = Y[i:i+batch_size]
        yield X_batch, Y_batch

## Softmax

In [13]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x),axis=1, keepdims=True)

## Main Loop

In [14]:
W = np.random.randn(input_size, output_size) * 0.1 # Avoid exploding gradients
b = np.zeros(output_size)  # Bias initialized to zero

for epoch in range(epochs):
 # Loop through batches
    for batch_X, batch_Y in generate_batches(train_X, train_Y, batch_size):
        #print(f"Batch_X shape:{batch_X.shape}")
        #print(f"Weights shape: {W.shape}")
        #print(f"Bias shape: {b.shape}")
        
        Z = np.dot(batch_X, W) + b 
        #print(f"Z shape: {Z.shape}")
        Y_pred = softmax(Z)
        #print(f"Y_pred shape: {Y_pred.shape}")
        Y_hat = np.argmax(Y_pred, axis=1)
        #print(f"Prediction shape: {Y_hat.shape}")

        loss = -np.sum(batch_Y * np.log(Y_pred + 1e-10)) / batch_size  # Cross-entropy loss
        
        # Backprop
        error = batch_Y - Y_pred

        dW = np.dot(batch_X.T, error) / batch_size
        db = np.sum(error, axis=0) / batch_size

        W += learning_rate * dW
        b += learning_rate * db
        
    print(f"Epoch {epoch+1}/{epochs} Loss: {loss:.4f}")


Epoch 1/50 Loss: 0.4923
Epoch 2/50 Loss: 0.4036
Epoch 3/50 Loss: 0.3696
Epoch 4/50 Loss: 0.3514
Epoch 5/50 Loss: 0.3397
Epoch 6/50 Loss: 0.3315
Epoch 7/50 Loss: 0.3252
Epoch 8/50 Loss: 0.3202
Epoch 9/50 Loss: 0.3161
Epoch 10/50 Loss: 0.3125
Epoch 11/50 Loss: 0.3094
Epoch 12/50 Loss: 0.3067
Epoch 13/50 Loss: 0.3042
Epoch 14/50 Loss: 0.3020
Epoch 15/50 Loss: 0.2999
Epoch 16/50 Loss: 0.2980
Epoch 17/50 Loss: 0.2962
Epoch 18/50 Loss: 0.2946
Epoch 19/50 Loss: 0.2931
Epoch 20/50 Loss: 0.2916
Epoch 21/50 Loss: 0.2902
Epoch 22/50 Loss: 0.2889
Epoch 23/50 Loss: 0.2877
Epoch 24/50 Loss: 0.2865
Epoch 25/50 Loss: 0.2854
Epoch 26/50 Loss: 0.2843
Epoch 27/50 Loss: 0.2833
Epoch 28/50 Loss: 0.2823
Epoch 29/50 Loss: 0.2814
Epoch 30/50 Loss: 0.2804
Epoch 31/50 Loss: 0.2796
Epoch 32/50 Loss: 0.2787
Epoch 33/50 Loss: 0.2779
Epoch 34/50 Loss: 0.2771
Epoch 35/50 Loss: 0.2763
Epoch 36/50 Loss: 0.2755
Epoch 37/50 Loss: 0.2748
Epoch 38/50 Loss: 0.2741
Epoch 39/50 Loss: 0.2734
Epoch 40/50 Loss: 0.2727
Epoch 41/

In [16]:
def evaluate(test_X, test_Y):
    z_test = np.dot(test_X, W) + b
    y_test_pred = softmax(z_test)
    y_test_hat = np.argmax(y_test_pred, axis=1)

    # Calculate accuracy
    correct_predictions = np.sum(y_test_hat == np.argmax(test_Y, axis=1))
    accuracy = correct_predictions / test_Y.shape[0]
    print(f"Test Accuracy: {accuracy:.4f}")

In [17]:
evaluate(test_X, test_Y)

Test Accuracy: 0.9243
