In [None]:
import numpy as np

class Network:
    def __init__(self, sizes):
        """
        sizes: list of layer-sizes, e.g. [784, 30, 10]
        """
        self.num_layers = len(sizes)
        self.sizes = sizes
        # biases for each non-input layer
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        # weights connecting layer l-1 to l
        self.weights = [np.random.randn(y, x)/np.sqrt(x)
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """Return output of network if input is a."""
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):
        """
        Train via mini-batch stochastic gradient descent.
        training_data: list of (x, y) pairs
        """
        n = len(training_data)
        for j in range(epochs):
            np.random.shuffle(training_data)
            # partition into mini-batches
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)
            ]
            for batch in mini_batches:
                self.update_mini_batch(batch, eta)
            if test_data:
                accuracy = self.evaluate(test_data)
                print(f"Epoch {j}: {accuracy} / {len(test_data)}")
            else:
                print(f"Epoch {j} complete")

    def update_mini_batch(self, batch, eta):
        """Apply one step of gradient descent on a single mini-batch."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in batch:
            # backprop returns gradients for this example
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            # accumulate
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b,
                                                   delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w,
                                                   delta_nabla_w)]
        # update parameters
        self.weights = [w - (eta/len(batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases  = [b - (eta/len(batch))*nb
                        for b, nb in zip(self.biases,  nabla_b)]

    def backprop(self, x, y):
        """Return (∂C/∂b, ∂C/∂w) for cost on input x with target y."""
        # forward pass
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]            # list of activations layer by layer
        zs = []                      # list of z vectors layer by layer
        for b, w in zip(self.biases, self.weights):
            z = w.dot(activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        # backward pass
        # delta for output layer
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = delta.dot(activations[-2].T)

        # l = 2 means second-to-last layer, etc.
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = self.weights[-l+1].T.dot(delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = delta.dot(activations[-l-1].T)
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        """Return # of inputs for which the network output is correct."""
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(pred == label) for pred, label in test_results)

    def cost_derivative(self, output_activations, y):
        """∂C/∂a for output activations."""
        return (output_activations - y)

# activation funcs
def sigmoid(z): return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z): return sigmoid(z)*(1-sigmoid(z))


In [None]:
from tensorflow.keras.datasets import mnist

# load and normalize
(train_X, train_y), (test_X, test_y) = mnist.load_data()
train_X = train_X.reshape((-1, 784, 1)) / 255.0
test_X  = test_X.reshape((-1, 784, 1)) / 255.0

# convert labels to one-hot vectors
def one_hot(j):
    e = np.zeros((10,1))
    e[j] = 1.0
    return e
training_data = list(zip(train_X, [one_hot(y) for y in train_y]))
test_data     = list(zip(test_X,  test_y))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
net = Network([784, 30, 10])
net.SGD(training_data, epochs=100, mini_batch_size=20, eta=0.5, test_data=test_data)


Epoch 0: 9174 / 10000
Epoch 1: 9283 / 10000
Epoch 2: 9334 / 10000
Epoch 3: 9407 / 10000
Epoch 4: 9452 / 10000
Epoch 5: 9479 / 10000
Epoch 6: 9504 / 10000
Epoch 7: 9524 / 10000
Epoch 8: 9529 / 10000
Epoch 9: 9547 / 10000
Epoch 10: 9559 / 10000
Epoch 11: 9559 / 10000
Epoch 12: 9578 / 10000
Epoch 13: 9586 / 10000
Epoch 14: 9584 / 10000
Epoch 15: 9601 / 10000
Epoch 16: 9604 / 10000
Epoch 17: 9596 / 10000
Epoch 18: 9603 / 10000
Epoch 19: 9616 / 10000
Epoch 20: 9610 / 10000
Epoch 21: 9620 / 10000
Epoch 22: 9632 / 10000
Epoch 23: 9635 / 10000
Epoch 24: 9628 / 10000
Epoch 25: 9632 / 10000
Epoch 26: 9640 / 10000
Epoch 27: 9636 / 10000
Epoch 28: 9634 / 10000
Epoch 29: 9636 / 10000
Epoch 30: 9641 / 10000
Epoch 31: 9650 / 10000
Epoch 32: 9640 / 10000
Epoch 33: 9639 / 10000
Epoch 34: 9645 / 10000
Epoch 35: 9640 / 10000
Epoch 36: 9654 / 10000
Epoch 37: 9646 / 10000
Epoch 38: 9656 / 10000
Epoch 39: 9656 / 10000
Epoch 40: 9653 / 10000
Epoch 41: 9655 / 10000
Epoch 42: 9647 / 10000
Epoch 43: 9655 / 1000

In [None]:
# number of correct on test set
num_correct = net.evaluate(test_data)

# total examples
n_test = len(test_data)

# accuracy as a fraction or percentage
accuracy = num_correct / n_test
print(f"Test set accuracy: {accuracy:.2%}")


Test set accuracy: 96.43%


In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

# Grab the 'balanced' split (47 classes)
(ds_train, ds_test), ds_info = tfds.load(
    'emnist/balanced',
    split=['train', 'test'],
    as_supervised=True,   # returns (image, label)
    with_info=True,
)

def preprocess(image, label):
    # TFDS images are 28×28×1, inverted & rotated; fix orientation:
    image = tf.transpose(image, [1, 0, 2])
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [784, 1])
    label = tf.one_hot(label, depth=47)
    return image, label

batch_size = 20
train_ds = ds_train.map(preprocess).batch(batch_size)
test_ds  = ds_test.map(preprocess).batch(batch_size)


In [None]:
net2 = Network([784, 30, 10])
net2.SGD(training_data, epochs=75, mini_batch_size=20, eta=0.5, test_data=test_data)



Epoch 0: 9140 / 10000
Epoch 1: 9260 / 10000
Epoch 2: 9326 / 10000
Epoch 3: 9363 / 10000
Epoch 4: 9400 / 10000
Epoch 5: 9431 / 10000
Epoch 6: 9448 / 10000
Epoch 7: 9475 / 10000
Epoch 8: 9481 / 10000
Epoch 9: 9487 / 10000
Epoch 10: 9494 / 10000
Epoch 11: 9503 / 10000
Epoch 12: 9521 / 10000
Epoch 13: 9513 / 10000
Epoch 14: 9538 / 10000
Epoch 15: 9517 / 10000
Epoch 16: 9537 / 10000
Epoch 17: 9537 / 10000
Epoch 18: 9554 / 10000
Epoch 19: 9540 / 10000
Epoch 20: 9547 / 10000
Epoch 21: 9549 / 10000
Epoch 22: 9565 / 10000
Epoch 23: 9556 / 10000
Epoch 24: 9569 / 10000
Epoch 25: 9569 / 10000
Epoch 26: 9542 / 10000
Epoch 27: 9565 / 10000
Epoch 28: 9567 / 10000
Epoch 29: 9573 / 10000
Epoch 30: 9578 / 10000
Epoch 31: 9566 / 10000
Epoch 32: 9563 / 10000
Epoch 33: 9576 / 10000
Epoch 34: 9570 / 10000
Epoch 35: 9578 / 10000
Epoch 36: 9576 / 10000
Epoch 37: 9571 / 10000
Epoch 38: 9578 / 10000
Epoch 39: 9590 / 10000
Epoch 40: 9576 / 10000
Epoch 41: 9576 / 10000
Epoch 42: 9591 / 10000
Epoch 43: 9599 / 1000

In [None]:
# number of correct on test set
num_correct = net2.evaluate(test_data)

# total examples
n_test = len(test_data)

# accuracy as a fraction or percentage
accuracy = num_correct / n_test
print(f"Test set accuracy: {accuracy:.2%}")


Test set accuracy: 95.90%
