In [2]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

def load_mnist():
    """Load the MNIST dataset from OpenML."""
    mnist = fetch_openml('mnist_784', version=1)
    X = mnist.data
    y = mnist.target.astype(int)
    return X, y

def preprocess_data(X, y):
    """Preprocess the data by normalizing and converting to the desired format."""
    X = X / 255.0  # Normalize the data
    X = X.values  # Convert DataFrame to ndarray
    X = X.reshape(-1, 784, 1) # Reshape X to be a column vector
    y = np.array([vectorized_result(label) for label in y])
    return X, y

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the j-th position."""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

# Load and preprocess data
X, y = load_mnist()
X, y = preprocess_data(X, y)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
training_data = list(zip(X_train, y_train))  # Do not transpose
test_data = list(zip(X_test, y_test))  # Do not transpose

class NeuralNetwork:
    def __init__(self, sizes):
        """Initialize the network with random weights and biases."""
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """Feed the input through the network."""
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def backprop(self, x, y):
        """Return the gradient of the cost function for a single training example."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].T)

        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].T, delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].T)

        return (nabla_b, nabla_w)

    def update_mini_batch(self, mini_batch, eta):
        """Update the network's weights and biases using gradient descent."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def train(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        """Train the neural network using mini-batch stochastic gradient descent."""
        for epoch in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, len(training_data), mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                accuracy = self.evaluate(test_data)
                print(f"Epoch {epoch}: {accuracy:.2f}%")

    def evaluate(self, test_data):
        """Evaluate the network's performance on the test data."""
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data]
        correct_results = sum(int(x == y) for (x, y) in test_results)
        accuracy = (correct_results / len(test_data)) * 100  # Convert to percentage
        return accuracy

    def cost_derivative(self, output_activations, y):
        """Return the derivative of the cost function."""
        return (output_activations - y)

def sigmoid(z):
    """The sigmoid function."""
    return np.where(z >= 0,
                    1.0 / (1.0 + np.exp(-z)),
                    np.exp(z) / (np.exp(z) + 1))

def sigmoid_prime(z):
    """The derivative of the sigmoid function."""
    sig = sigmoid(z)
    return sig * (1 - sig)

# Initialize and train the neural network
network = NeuralNetwork([784, 100, 10])  # Example: 784 input neurons, 100 hidden neurons, 10 output neurons
network.train(training_data, epochs=30, mini_batch_size=10, eta=3.0, test_data=test_data)


  warn(


Epoch 0: 74.92%
Epoch 1: 76.83%
Epoch 2: 85.51%
Epoch 3: 86.37%
Epoch 4: 86.47%
Epoch 5: 86.74%
Epoch 6: 87.01%
Epoch 7: 86.94%
Epoch 8: 87.12%
Epoch 9: 87.24%
Epoch 10: 87.29%
Epoch 11: 87.38%
Epoch 12: 87.36%
Epoch 13: 87.31%
Epoch 14: 87.44%
Epoch 15: 87.31%
Epoch 16: 87.61%
Epoch 17: 87.47%
Epoch 18: 87.56%
Epoch 19: 87.51%
Epoch 20: 87.56%
Epoch 21: 87.64%
Epoch 22: 87.56%
Epoch 23: 87.59%
Epoch 24: 87.55%
Epoch 25: 87.54%
Epoch 26: 87.58%
Epoch 27: 87.56%
Epoch 28: 87.69%
Epoch 29: 87.68%
