In [1]:
# Import libraries
import random
import time
import numpy as np

# Helper/miscellaneous functions
from src.mnist_loader import load_data_wrapper

def sigmoid(z):
    '''The sigmoid function.'''
    return 1.0/(1.0+np.exp(-z))

def dsigmoid(z):
    '''The derivative of the sigmoid function.'''
    return sigmoid(z)*(1-sigmoid(z))

In [None]:
class Network(object): # Inherits from object
    
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        # The biases are added to every layer but the first.
        self.biases = [np.random.randn(x, 1) for x in sizes[1:]] 
        # Weights connect the layers. The first layer's size is the width, the second layer's is the height.
        self.weights = [np.random.randn(x,y) for y,x in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        '''Return the output of the network if ``a`` is input.'''
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    
    def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):
        """Train the neural network using mini-batch stochastic
        gradient descent.  The ``training_data`` is a list of tuples
        ``(x, y)`` representing the training inputs and the desired
        outputs.  The other non-optional parameters are
        self-explanatory.  If ``test_data`` is provided then the
        network will be evaluated against the test data after each
        epoch, and partial progress printed out.  This is useful for
        tracking progress, but slows things down substantially."""
        if test_data: n_test = len(test_data)
        n = len(training_data)

        for j in range(epochs):
            time1 = time.time()

            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)

            time2 = time.time()

            if test_data:
                print(f"Epoch {j}: {self.evaluate(test_data)} / {n_test}, took {time2-time1:.2f} seconds")
            else:
                print(f"Epoch {j} complete in {time2-time1:.2f} seconds")
    
    def update_mini_batch(self, mini_batch, eta):
        """Update the network's weights and biases by applying
        gradient descent using backpropagation to a single mini batch.
        The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
        is the learning rate."""
        # Hold updates to weights and biases
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        # Compute gradient for every training example and add it to nablas
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        self.weights = [w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb # Why 1/len(minibatch)? - Makes it the average of the changes that each example wants to make
                       for b, nb in zip(self.biases, nabla_b)]

In [6]:
dummy_net = Network([2, 3, 1])
dummy_net.feedforward([2,5])


array([[0.71614034, 0.7944018 , 0.71613763]])

In [4]:
print(dummy_net.weights)
print(dummy_net.biases)

[array([[ 0.78518476, -0.36950739],
       [ 1.44081579,  0.08957805],
       [ 0.54986944,  0.75476107]]), array([[ 0.87810376,  0.66296562, -0.19915689]])]
[array([[-0.24724163],
       [ 1.46399988],
       [-0.50407173]]), array([[0.28837317]])]
