In [52]:
import numpy as np
from torchvision.datasets import MNIST
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten() / 255.0,
                    download=True,
                    train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return np.array(mnist_data), np.array(mnist_labels)

In [53]:
def one_hot_encode(labels):
    encoder = OneHotEncoder(sparse_output=False)
    return encoder.fit_transform(labels.reshape(-1, 1))

In [54]:
def initialize_params(input_dim, output_dim):
    W = np.random.randn(input_dim, output_dim) * 0.01
    b = np.zeros((1, output_dim))
    return W, b

In [55]:
def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return expZ / np.sum(expZ, axis=1, keepdims=True)

In [56]:
def forward_propagation(X, W, b):
    Z = np.dot(X, W) + b
    A = softmax(Z)
    return A

In [57]:
def compute_loss(A, Y):
    m = Y.shape[0]
    log_likelihood = -np.log(A[range(m), Y.argmax(axis=1)])
    return np.sum(log_likelihood) / m

In [58]:
def backward_propagation(X, Y, A):
    m = X.shape[0]
    dZ = A - Y
    dW = np.dot(X.T, dZ) / m
    db = np.sum(dZ, axis=0, keepdims=True) / m
    return dW, db

In [59]:
def update_params(W, b, dW, db, learning_rate):
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

In [60]:
def train_model(train_X, train_Y, input_dim, output_dim, epochs=100, learning_rate=0.01, batch_size=100):
    W, b = initialize_params(input_dim, output_dim)
    
    for epoch in range(epochs):
        perm = np.random.permutation(train_X.shape[0])
        train_X = train_X[perm]
        train_Y = train_Y[perm]
        
        for i in range(0, train_X.shape[0], batch_size):
            X_batch = train_X[i:i+batch_size]
            Y_batch = train_Y[i:i+batch_size]
            
            # Forward propagation
            A = forward_propagation(X_batch, W, b)
            
            # Compute loss (optional for tracking)
            loss = compute_loss(A, Y_batch)
            
            # Backward propagation
            dW, db = backward_propagation(X_batch, Y_batch, A)
            
            # Update parameters
            W, b = update_params(W, b, dW, db, learning_rate)
            
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}')
    
    return W, b

In [61]:
def predict(X, W, b):
    A = forward_propagation(X, W, b)
    return np.argmax(A, axis=1)

In [62]:
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)
    
train_Y = one_hot_encode(train_Y)
test_Y_one_hot = one_hot_encode(test_Y)
    
input_dim = 784
output_dim = 10
epochs = 100
learning_rate = 0.01
batch_size = 100
    
W, b = train_model(train_X, train_Y, input_dim, output_dim, epochs, learning_rate, batch_size)
    
train_predictions = predict(train_X, W, b)
test_predictions = predict(test_X, W, b)
    
print(f'Training Accuracy: {accuracy_score(np.argmax(train_Y, axis=1), train_predictions) * 100:.2f}%')
print(f'Test Accuracy: {accuracy_score(test_Y, test_predictions) * 100:.2f}%')

Epoch 1/100, Loss: 0.6757
Epoch 2/100, Loss: 0.6648
Epoch 3/100, Loss: 0.5222
Epoch 4/100, Loss: 0.4759
Epoch 5/100, Loss: 0.4113
Epoch 6/100, Loss: 0.6114
Epoch 7/100, Loss: 0.4044
Epoch 8/100, Loss: 0.4378
Epoch 9/100, Loss: 0.4482
Epoch 10/100, Loss: 0.4473
Epoch 11/100, Loss: 0.3053
Epoch 12/100, Loss: 0.5141
Epoch 13/100, Loss: 0.4180
Epoch 14/100, Loss: 0.4805
Epoch 15/100, Loss: 0.3147
Epoch 16/100, Loss: 0.3753
Epoch 17/100, Loss: 0.3410
Epoch 18/100, Loss: 0.3624
Epoch 19/100, Loss: 0.4568
Epoch 20/100, Loss: 0.3700
Epoch 21/100, Loss: 0.4296
Epoch 22/100, Loss: 0.2702
Epoch 23/100, Loss: 0.3399
Epoch 24/100, Loss: 0.2417
Epoch 25/100, Loss: 0.5139
Epoch 26/100, Loss: 0.2240
Epoch 27/100, Loss: 0.3124
Epoch 28/100, Loss: 0.3614
Epoch 29/100, Loss: 0.2804
Epoch 30/100, Loss: 0.3677
Epoch 31/100, Loss: 0.3729
Epoch 32/100, Loss: 0.3363
Epoch 33/100, Loss: 0.2815
Epoch 34/100, Loss: 0.4348
Epoch 35/100, Loss: 0.4546
Epoch 36/100, Loss: 0.3034
Epoch 37/100, Loss: 0.3679
Epoch 38/1

In [63]:
result = predict(train_X[0],W,b)
print(result)
print(f'Training Accuracy: {accuracy_score(np.argmax(train_Y[0]), result) * 100:.2f}%')

[5]


AxisError: axis 1 is out of bounds for array of dimension 1