In [2]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
    transform=lambda x: np.array(x).flatten() / 255.0,
    download=True,
    train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return mnist_data, mnist_labels

train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)



In [3]:
train_y_one_hot = np.zeros((60000,10))
row_indices = np.arange(len(train_y_one_hot))
train_y_one_hot[row_indices, train_Y] = 1

test_y_one_hot = np.zeros((10000,10))
row_indices = np.arange(len(test_y_one_hot))
test_y_one_hot[row_indices, test_Y] = 1


In [4]:
import time

def softmax(arr):
    exp_arr = np.exp(arr - np.max(arr, axis=1, keepdims=True))
    return exp_arr / np.sum(exp_arr, axis=1, keepdims=True)

def accuracy(y_true, y_pred):
    predicted_classes = np.argmax(y_pred, axis=1)
    true_classes = np.argmax(y_true, axis=1)
    return np.mean(predicted_classes == true_classes)

np.random.seed(69)
weights = np.random.rand(784, 10)
bias = np.random.rand(10)
learning_rate = 0.01
num_epochs = 100
batch_size = 100

start_time = time.time()

for epoch in range(num_epochs):
    
    for i in range(0, len(train_X), batch_size):
        batch_x = np.array(train_X[i:i+batch_size])
        batch_y = train_y_one_hot[i:i+batch_size]
        
        sums = batch_x @ weights + bias
        y_prob = softmax(sums)
        
        gradient_weights =  1 / batch_size * batch_x.T @ (y_prob - batch_y)
        gradient_bias = 1 / batch_size * np.sum(y_prob - batch_y, axis=0)
    
        weights -= gradient_weights * learning_rate
        bias -= gradient_bias * learning_rate
        
    train_predictions = softmax(train_X @ weights + bias)
    train_acc = accuracy(train_y_one_hot, train_predictions)
        
    print(f"Epoch {epoch+1}/{num_epochs}, Train Accuracy: {train_acc:.4f}")
    

test_predictions = softmax(test_X @ weights + bias)
test_acc = accuracy(test_y_one_hot, test_predictions)

end_time = time.time()
print(f'Execution Time: {end_time - start_time}')
print(f"Test Accuracy: {test_acc:.4f}")

Epoch 1/100, Train Accuracy: 0.6382
Epoch 2/100, Train Accuracy: 0.7451
Epoch 3/100, Train Accuracy: 0.7863
Epoch 4/100, Train Accuracy: 0.8102
Epoch 5/100, Train Accuracy: 0.8250
Epoch 6/100, Train Accuracy: 0.8361
Epoch 7/100, Train Accuracy: 0.8447
Epoch 8/100, Train Accuracy: 0.8513
Epoch 9/100, Train Accuracy: 0.8563
Epoch 10/100, Train Accuracy: 0.8609
Epoch 11/100, Train Accuracy: 0.8650
Epoch 12/100, Train Accuracy: 0.8684
Epoch 13/100, Train Accuracy: 0.8715
Epoch 14/100, Train Accuracy: 0.8743
Epoch 15/100, Train Accuracy: 0.8768
Epoch 16/100, Train Accuracy: 0.8787
Epoch 17/100, Train Accuracy: 0.8803
Epoch 18/100, Train Accuracy: 0.8821
Epoch 19/100, Train Accuracy: 0.8840
Epoch 20/100, Train Accuracy: 0.8858
Epoch 21/100, Train Accuracy: 0.8871
Epoch 22/100, Train Accuracy: 0.8886
Epoch 23/100, Train Accuracy: 0.8898
Epoch 24/100, Train Accuracy: 0.8909
Epoch 25/100, Train Accuracy: 0.8918
Epoch 26/100, Train Accuracy: 0.8925
Epoch 27/100, Train Accuracy: 0.8931
Epoch 28/1