In [14]:
import random
import numpy as np
import time
import mnist_loader

In [None]:
training_data, validation_data ,test_data = mnist_loader.load_data_wrapper()

print(len(training_data), len(validation_data), len(test_data))
print(training_data[0][0].shape, training_data[0][1].shape)  
print(test_data[0][0].shape, type(test_data[0][1]), test_data[0][1])

50000 10000 10000
(784, 1) (10, 1)
(784, 1) <class 'numpy.int64'> 7


In [16]:
class Network(object):
    def __init__(self,sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(n,1) for n in sizes[1:]]
        self.weights = [np.random.randn(m,n) for n,m in zip(sizes[:self.num_layers - 1],sizes[1:])]
    
    def feed_forward(self,a):
        for b,w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w,a)+b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data = None):
        if test_data: ntest = len(test_data)
        n  = len(training_data)
        
        for j in range(epochs):
            time1  = time.time()
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size] for k in range(0,n,mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            time2 = time.time()
            if test_data is not None:
                print(f"Epoch {j}, acc: {100 * self.evaluate_network(test_data)}% \
                      took {time2-time1:.2f} sec")
            else:
                print(f"Epoch {j} took {time2-time1:.2f} sec")

    def update_mini_batch(self,mini_batch,eta):

        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x,y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.back_prop(x,y)
            nabla_b = [nb+dnb for nb,dnb in zip(nabla_b,delta_nabla_b)]
            nabla_w = [nw+dnw for nw,dnw in zip(nabla_w,delta_nabla_w)]
        self.weights = [w - eta/len(mini_batch)*gw for w,gw in zip(self.weights,nabla_w)]
        self.biases = [b - eta/len(mini_batch)*gb for b,gb in zip(self.biases,nabla_b)]


    def back_prop(self,x,y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        activation = x
        activations = [x]
        zs = []

        for w,b in zip(self.weights, self.biases):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        
        delta = self.cost_derivative(activations[-1] , y) * d_sigmoid(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta,activations[-2].transpose())
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = d_sigmoid(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)
    
    def evaluate_network(self, test_data):
        correct = 0
        for x, y in test_data:
            pred = int(np.argmax(self.feed_forward(x)))
            correct += (pred == int(y))
        return correct / len(test_data)
    
    def cost_derivative(self, activations, y):
        return activations - y


def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1/(1+np.exp(-x))

def d_sigmoid(x):
    s = sigmoid(x)
    return s*(1-s)

In [17]:
net = Network([784, 30, 10])
net.SGD(training_data, epochs=10, mini_batch_size=10, eta=3.0, test_data=test_data)

Epoch 0, acc: 90.18%                       took 8.20 sec
Epoch 1, acc: 92.45%                       took 8.21 sec
Epoch 2, acc: 93.14%                       took 8.83 sec
Epoch 3, acc: 93.58999999999999%                       took 8.66 sec
Epoch 4, acc: 93.77%                       took 8.76 sec
Epoch 5, acc: 93.95%                       took 8.11 sec
Epoch 6, acc: 94.14%                       took 8.08 sec
Epoch 7, acc: 94.28%                       took 8.14 sec
Epoch 8, acc: 94.21000000000001%                       took 7.99 sec
Epoch 9, acc: 94.44%                       took 8.70 sec
