In [1]:
import numpy as np
from mnist.loader import MNIST

# Load the MNIST dataset
mndata = MNIST("C:\\Users\\muzyk\\Downloads\\mnist")  # Replace with the path to your MNIST data
mndata.gz = True

# Load the training and testing data
train_images, train_labels = mndata.load_training()
test_images, test_labels = mndata.load_testing()

# Convert to NumPy arrays for easier manipulation
train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)
test_labels = np.array(test_labels)

# Normalize pixel values to be between 0 and 1
train_images = train_images / 255.0
test_images = test_images / 255.0

# One-hot encode the labels
num_classes = 10
train_labels = np.eye(num_classes)[train_labels]
test_labels = np.eye(num_classes)[test_labels]

# Define the neural network architecture
input_size = 784  # 28x28 image dimensions
hidden_size = 128
output_size = num_classes

# Initialize weights and biases with Xavier initialization
np.random.seed(0)
weights_input_hidden = np.random.normal(0, np.sqrt(2 / (input_size + hidden_size)), size=(input_size, hidden_size))
bias_hidden = np.zeros(hidden_size)
weights_hidden_output = np.random.normal(0, np.sqrt(2 / (hidden_size + output_size)), size=(hidden_size, output_size))
bias_output = np.zeros(output_size)

# Softmax activation function for the output layer
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum()


# Learning rate
learning_rate = 0.01

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    for i in range(len(train_images)):
        # Forward propagation
        hidden_input = np.dot(train_images[i], weights_input_hidden) + bias_hidden
        hidden_output = 1 / (1 + np.exp(-hidden_input))  # Sigmoid activation for the hidden layer
        output_input = np.dot(hidden_output, weights_hidden_output) + bias_output
        output = softmax(output_input)

        # Calculate the cross-entropy loss
        loss = -np.sum(train_labels[i] * np.log(output))

        # Backpropagation
        d_output = -train_labels[i] * np.log(output)
        d_hidden = np.dot(d_output, weights_hidden_output.T) * hidden_output * (1 - hidden_output)

        # Update weights and biases
        weights_hidden_output -= np.outer(hidden_output, d_output) * learning_rate
        bias_output -= d_output * learning_rate
        weights_input_hidden -= np.outer(train_images[i], d_hidden) * learning_rate
        bias_hidden -= d_hidden * learning_rate

    # Calculate the test accuracy
    correct = 0
    for i in range(len(test_images)):
        hidden_input = np.dot(test_images[i], weights_input_hidden) + bias_hidden
        hidden_output = 1 / (1 + np.exp(-hidden_input))
        output_input = np.dot(hidden_output, weights_hidden_output) + bias_output
        output = softmax(output_input)
        if np.argmax(output) == np.argmax(test_labels[i]):
            correct += 1

    test_accuracy = correct / len(test_images)
    
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Test Accuracy: {test_accuracy * 100:.2f}%")


  loss = -np.sum(train_labels[i] * np.log(output))
  loss = -np.sum(train_labels[i] * np.log(output))
  d_output = -train_labels[i] * np.log(output)
  d_output = -train_labels[i] * np.log(output)


KeyboardInterrupt: 