# Building a neural network from scratch

In this excercise I want to build a neural network from scratch to make sure I understand all the elements properly. 
It won't be particularly good but it should at least work. 
I will test it on the MNIST data set. 

## Import Libraries

In [1]:
import numpy as np
import jacobnet as jn
#from jacobnet import layer
#from keras.datasets import mnist
#(x_train, y_train), (x_test, y_test) = mnist.load_data()
# I use keras only to handle the MNIST data set. Not to make the NN.

In [4]:
jn.layer.Layer().name

'Layer'

## Some useful functions

In [4]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))
sigmoid = np.vectorize(sigmoid)

def sigmoid_prime(x):
    return np.exp(-x)/(1 + np.exp(-x))**2
sigmoid_prime = np.vectorize(sigmoid_prime)


def loss(output, label_vec):
    L = 1/10*mp.linalg.norm(output - label_vec)**2
    return L

def loss_derivative(output):
    return 2/10*output.T

def label_to_vec(number):
    v = np.zeros((10,1))
    v[number] = 1
    return v

## Initialise weights and biases of NN

In [46]:
# input size is 28x28=784 
# hidden layer size 50
# output layer size 10
W = [0, 0]
b = [0, 0]

# hidden layer
W[0] = np.random.rand(50, 784)/(784*50)
b[0] = np.zeros((50, 1))

# output layer
W[1] = np.random.rand(10, 50)/(50*10)
b[1] = np.zeros((10, 1))

def predict(v):
    v = sigmoid(np.dot(W[0], v) + b[0])
    v = sigmoid(np.dot(W[1], v) + b[1])
    return v



In [190]:
class MyNeuralNetwork():
    def __init__(self):
        self.num_layers = 2
        self.input_size = 784
        self.nodes_per_layer = [50, 10]
        self.W = [np.random.rand(50, 784)/(784*50), np.random.rand(10, 50)/(50*10)]
        self.b = [np.zeros((50, 1)), np.zeros((10, 1))]
        self.eta = 0.1
        
    def forward(self, input_vec):
        v = input_vec
        for n_layer in range(self.num_layers):
            v = sigmoid(np.dot(self.W[n_layer], v) + self.b[n_layer])
        output_vec = v
        return output_vec
        
    def backpropagate(self, input_vec, label):
        # includes a forward pass to record the outputs of each layer. 
        v = np.ndarray.flatten(input_vec/255).reshape(784,1)
        z_store = [0]  # z_0 doesn't exist so call it 0
        a_store = [v]  # a_0 is input
        dW = self.W # these will be updated.
        db = self.b
        for n_layer in range(self.num_layers):
            v = np.dot(self.W[n_layer], v) + self.b[n_layer]
            z_store.append(v)
            v = sigmoid(v)
            a_store.append(v)
            
        # now pass backward to calculate derivative. 
        da = 2/10*(a_store[-1] - label_to_vec(label)).T
        dz = np.dot(da, np.diag(sigmoid_prime(z_store[-1]).T[0]))
        db[-1] = dz
        #dW[-1] = np.dot(dz, )
        # update last layer
        #self.b[-1] += -self.eta*db[-1]**2
        #self.W[-1] += -self.eta*dW[-1]**2
        
        # pass back derivative
        da = np.dot(dz, self.W[-1])
        dz = np.dot(da, np.diag(sigmoid_prime(z_store[-2]).T[0]))
        db[-2] = dz
       # dW[-2] = np.dot(dz, )
        # update
        #self.b[-2] += -self.eta*db[-2]**2
        #self.W[-2] += -self.eta*dW[-2]**2
        
        return db, dW
    
    def batch_train(self, x_train, y_train):
        
        N_batch = len(y_train)
        
        db = [0 for n in range(N_batch)]
        dW = [0 for n in range(N_batch)]
        
        for n in range(N_batch):
            X = x_train[n]
            Y = y_train[n]
            db[n], dW[n] = self.backpropagate(X, Y)
    
        db = sum(db)/N_batch
        dW = sum(dW)/N_batch
    


In [195]:
nn = MyNeuralNetwork()
#nn.backpropagate(x_train[0],y_train[0])
nn.batch_train(x_train[:10],y_train[:10])

ValueError: operands could not be broadcast together with shapes (10,50) (1,10) 

In [156]:
db, dW = nn.backpropagate(np.ndarray.flatten(x_train[0]/255).reshape(784,1), 2)

In [175]:
label_to_vec(y_train[0])

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.]])