# Shallow Neural Network using `numpy`, Piece by Piece

This network is exactly the same as the one in [notebook 4c](https://github.com/sallamander/neural-networks-intro/blob/master/mini-books/shallow-neural-networks/04-shallow-neural-network/4c_nonlinear_nn_np.ipynb), except that each weight and bias has recieved its own variable, as opposed to being throw in a vector with the other weights and biases within the same layer. 

In [1]:
def learn_w_gradient_descent(xs, ys, hlayer_size=2):
    learning_rate = 0.001
    
    #####################################################################
    ##### Step 1 - Initialize weights and biases with random values #####  
    #####################################################################
    
    ##### Weights/Biases connecting Layer 1 to Layer 2 #####
    w2_11 = 2 * np.random.random() - 1
    w2_12 = 2 * np.random.random() - 1
    b2_1, b2_2 = 0, 0
    
    ##### Weights/Biases connecting Layer 2 to Layer 3 #####
    w3_11 = 2 * np.random.random() - 1
    w3_21 = 2 * np.random.random() - 1
    b3_1 = 0

    for _ in range(50000):
        
        #####################################################
        ##### Step 2A - calculate our predicted values. #####
        #####################################################
        
        ##### Layer 2 calculations ##### 
        z2_1 = w2_11 * xs + b2_1 # weighted sum of the inputs to Node 1 in Layer 2
        z2_2 = w2_12 * xs + b2_2 # weighted sum of the inputs to Node 2 in Layer 2
        a2_1 = sigmoid(z2_1) # activation of Node 1 in Layer 2
        a2_2 = sigmoid(z2_2) # activation of Node 2 in Layer 2
        
        # z_3_1 = w3_11 * a2_1 + w3_21 * a2_2 + b3_1
        
        ##### Layer 3 (output layer) calculations ##### 
        yhats = w3_11 * a2_1 + w3_21 * a2_2 + b3_1
        
        ###########################################
        ##### Step 2B - calculate our errors. #####
        ###########################################
        diffs = ys - yhats 
        es = 0.5 * (diffs ** 2) # not currently used
        
        ######################################################################
        ##### Step 2C - calculate the gradient of the error with respect #####
        ##### to the weights/biases, and use it to update the coefficients. ##
        ######################################################################
        
        ##### Gradient/derivative calculations #####
        db3_1 = -diffs
        dw3_11 = -diffs * a2_1
        dw3_21 = -diffs * a2_2
        db2_1 = -diffs * w3_11 * a2_1 * (1 - a2_1)
        dw2_11 = -diffs * w3_11 * a2_1 * (1 - a2_1) * xs
        db2_2 = -diffs * w3_21 * a2_2 * (1 - a2_2)
        dw2_12 = -diffs * w3_21 * a2_2 * (1 - a2_2) * xs
        
        ##### Updates #####                
        b3_1 -= learning_rate * db3_1.mean()
        w3_11 -= learning_rate * dw3_11.mean()
        w3_21 -= learning_rate * dw3_21.mean()
        b2_1 -= learning_rate * db2_2.mean()
        w2_11 -= learning_rate * dw2_11.mean()
        b2_2 -= learning_rate * db2_2.mean()
        w2_12 -= learning_rate * dw2_12.mean()

    # This will hold our predictions from the final iteration
    return yhats