In [1]:
import numpy as np
from torchvision.datasets import MNIST
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten() / 255.0,
                    download=True,
                    train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return np.array(mnist_data), np.array(mnist_labels)

In [2]:
def one_hot_encode(labels):
    encoder = OneHotEncoder(sparse_output=False)
    return encoder.fit_transform(labels.reshape(-1, 1))

In [3]:
def initialize_params(input_dim, output_dim):
    W = np.random.randn(input_dim, output_dim) * 0.01   # W -> 784, 10
    b = np.zeros((1, output_dim))                       # b -> 1, 10
    return W, b

In [4]:
def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return expZ / np.sum(expZ, axis=1, keepdims=True)

In [5]:
def forward_propagation(X, W, b):
    Z = np.dot(X, W) + b
    A = softmax(Z)
    return A

In [6]:
def compute_loss(A, Y):
    m = Y.shape[0]
    log_likelihood = -np.log(A[range(m), Y.argmax(axis=1)])
    return np.sum(log_likelihood) / m

In [7]:
def backward_propagation(X, Y, A):
    m = X.shape[0]
    dZ = A - Y
    dW = np.dot(X.T, dZ) / m 6
    db = np.sum(dZ, axis=0, keepdims=True) / m
    return dW, db

In [8]:
def update_params(W, b, dW, db, learning_rate):
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

In [9]:
def train_model(train_X, train_Y, input_dim, output_dim, epochs=100, learning_rate=0.01, batch_size=100):
    W, b = initialize_params(input_dim, output_dim)
    
    for epoch in range(epochs):
        perm = np.random.permutation(train_X.shape[0])
        train_X = train_X[perm]
        train_Y = train_Y[perm]
        
        for i in range(0, train_X.shape[0], batch_size):
            X_batch = train_X[i:i+batch_size]
            Y_batch = train_Y[i:i+batch_size]
            
            # Forward propagation
            A = forward_propagation(X_batch, W, b)
            
            # Compute loss (optional for tracking)
            loss = compute_loss(A, Y_batch)
            
            # Backward propagation
            dW, db = backward_propagation(X_batch, Y_batch, A) # X -> 100x784, Y -> 100x10, A -> 100X10
            
            # Update parameters
            W, b = update_params(W, b, dW, db, learning_rate)
            
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}')
    
    return W, b

In [10]:
def predict(X, W, b):
    A = forward_propagation(X, W, b)
    return np.argmax(A, axis=1)

In [11]:
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)
    
train_Y = one_hot_encode(train_Y)
test_Y_one_hot = one_hot_encode(test_Y)
    
input_dim = 784
output_dim = 10
epochs = 100
learning_rate = 0.01
batch_size = 100
    
W, b = train_model(train_X, train_Y, input_dim, output_dim, epochs, learning_rate, batch_size)
    
train_predictions = predict(train_X, W, b)
test_predictions = predict(test_X, W, b)
    
print(f'Training Accuracy: {accuracy_score(np.argmax(train_Y, axis=1), train_predictions) * 100:.2f}%')
print(f'Test Accuracy: {accuracy_score(test_Y, test_predictions) * 100:.2f}%')

Epoch 1/100, Loss: 0.7306
Epoch 2/100, Loss: 0.4533
Epoch 3/100, Loss: 0.4279
Epoch 4/100, Loss: 0.4009
Epoch 5/100, Loss: 0.4336
Epoch 6/100, Loss: 0.2544
Epoch 7/100, Loss: 0.3600
Epoch 8/100, Loss: 0.4100
Epoch 9/100, Loss: 0.3769
Epoch 10/100, Loss: 0.4769
Epoch 11/100, Loss: 0.3975
Epoch 12/100, Loss: 0.3613
Epoch 13/100, Loss: 0.3611
Epoch 14/100, Loss: 0.3559
Epoch 15/100, Loss: 0.3959
Epoch 16/100, Loss: 0.3565
Epoch 17/100, Loss: 0.4034
Epoch 18/100, Loss: 0.5589
Epoch 19/100, Loss: 0.3444
Epoch 20/100, Loss: 0.3341
Epoch 21/100, Loss: 0.4690
Epoch 22/100, Loss: 0.1757
Epoch 23/100, Loss: 0.1935
Epoch 24/100, Loss: 0.4857
Epoch 25/100, Loss: 0.3096
Epoch 26/100, Loss: 0.2985
Epoch 27/100, Loss: 0.3948
Epoch 28/100, Loss: 0.3299
Epoch 29/100, Loss: 0.2642
Epoch 30/100, Loss: 0.3175
Epoch 31/100, Loss: 0.3563
Epoch 32/100, Loss: 0.2842
Epoch 33/100, Loss: 0.2948
Epoch 34/100, Loss: 0.1987
Epoch 35/100, Loss: 0.3263
Epoch 36/100, Loss: 0.3748
Epoch 37/100, Loss: 0.3584
Epoch 38/1