Making some an awesome neural network

In [69]:
import numpy as np
import random
import math

In [70]:
weights = [None]
biases = [None]
activations = []

layers = [5,4,5,3]
weighted_inputs = [None] # The layer[0] doesn't have weights or biases. Therefore not a weighted input.

e = 2.7182818284590452353602874713527

In [71]:
def create_random_matrix(rows, columns, max, min=0):
    matrix = np.random.uniform(min, max, size=(rows, columns))
    return matrix

def create_random_vector(rows, max, min=0):
    vector = np.random.uniform(min, max, size=(rows,))
    return vector

In [72]:
def generate_network():
    # We start at one, because this loop handles the previous layer as well.
    for i in range(1,len(layers)):
        
        previous_layer_size = layers[i-1]
        current_layer_size = layers[i]

        # Generate the weights for layer i
        matrix = create_random_matrix(
            current_layer_size,
            previous_layer_size, 
            1,
            -1
        )
        weights.append(matrix)

        # Generate the biases for layer i
        vector = create_random_vector(
            current_layer_size,
            1,
            -1
        )
        biases.append(vector)

In [73]:
import numpy as np
# My activation function of choice. Sigmoid returns a value between 1 and 0.
def sigmoid(x):
    return 1 / (1 + e**(-x))

# Applies the sigmoid function to every value in the vector.
def vector_sigmoid(vector):
    sig_vector = np.vectorize(sigmoid)(vector)
    return sig_vector

# Derivative of the sigmoid function.
def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))


In [74]:
# Calculates the values of a single layer.
def calculate_layer(layer):
    # z(L) = b(L) + W(L) * a(L-1)
    weighted_input = np.add(biases[layer], np.dot(weights[layer], activations[layer - 1]))
    weighted_inputs.append(weighted_input)
    return vector_sigmoid(weighted_input)


# Loops through every layer and calculates their activation values.
def calculate_output(input_activation):
    global activations
    activations.append(input_activation)

    for i in range(1,len(layers)):
        activations.append(calculate_layer(i))

    return activations[-1]

In [75]:
# Calculates the cost of the network.
def vector_cost(a, y):
    # C = sum((a-y)^2)
    sum = 0
    for i in range(len(a)-1):
        sum += (a[i]-y[i])**2
    return sum

In [76]:
# Calculates the derivative of the chain rule.
def calculate_chain_derivative(is_parent, layer, row, column):
    sum = 0
    if layer == len(activations) - 1:
        cost_derivative = 2 * (activations[layer][row] - expected_output[row])
        sum = cost_derivative
    else:
        for i in range(len(activations[layer+1])):
            sum += calculate_weight_derivative(False, layer + 1, i, row)
    weighted_input = weighted_inputs[layer][row]
    activation_derivative = sigmoid_derivative(weighted_input)
    if is_parent:
        return sum * activation_derivative
    else:
        weighted_input_derivative = weights[layer][row][column]
    return sum * activation_derivative * weighted_input_derivative

In [77]:
# Calculates the derivative of the bias.
def calculate_bias_derivative(is_parent, layer, row, column): # Literally just a useless function.
    chain_derivative = calculate_chain_derivative(is_parent, layer, row, column)
    return chain_derivative

# Calculates the derivative of the weight.
def calculate_weight_derivative(is_parent, layer, row, column):
    chain_derivative = calculate_chain_derivative(is_parent, layer, row, column)
    return chain_derivative * activations[layer-1][column]

In [78]:
weight_derivatives = []
for layer in range(1,len(weights)):
    # Matrix
    matrix = []
    for row in range(len(weights[layer])):
        # Vector
        vector = []
        for column in range(len(weights[layer][row])):
            weight_derivative = calculate_weight_derivative(
                True,
                layer,
                row,
                column
            )
            vector.append(weight_derivative)
        matrix.append(vector)
    weight_derivatives.append(matrix)
    gradient.append(weight_derivatives)


In [79]:
def backpropagate():
    gradient = []

    # calculate all the weight derivatives

    # List of matrices
    weight_derivatives = []
    for layer in range(1, len(weights)):
        # Matrix
        matrix = np.zeros_like(weights[layer])
        for row in range(len(weights[layer])):
            # Vector
            vector = np.zeros_like(weights[layer][row])
            for column in range(len(weights[layer][row])):
                weight_derivative = calculate_weight_derivative(
                    True,
                    layer,
                    row,
                    column
                )
                vector[column] = weight_derivative
            matrix[row] = vector
        weight_derivatives.append(matrix)
    gradient.append(weight_derivatives)

    # calculate all the bias derivatives

    # List of vectors
    bias_derivatives = []
    for layer in range(1, len(biases)):
        # Vector
        vector = np.zeros_like(biases[layer])
        for row in range(len(biases[layer])):
            bias_derivative = calculate_bias_derivative(
                True,
                layer,
                row,
                column
            )
            vector[row] = bias_derivative
        bias_derivatives.append(vector)
    gradient.append(bias_derivatives)

    return gradient

In [80]:
def update_parameters(gradient,learn_rate):
    for layer in range(1,len(weights)):
        np.add(weights[layer], learn_rate * gradient[0][layer-1])
        np.add(biases[layer], learn_rate * gradient[1][layer-1])

In [81]:
# running the program
generate_network()
print(biases)
output_activation =  calculate_output([0.5, 1, 0.2, 0, 0.1])
print("Output:")
print(output_activation)
expected_output = [1, 0, 0.5]
cost = vector_cost(output_activation, expected_output);
print("Cost: " + str(cost))
gradient = backpropagate()
print("Gradient:")
print(gradient)
update_parameters(gradient, 0.1)

[None, array([ 0.19112718, -0.79442128,  0.87420348,  0.48644452]), array([-0.3551773 , -0.06607465, -0.3449589 ,  0.57839001,  0.32339057]), array([-0.09459118, -0.6258225 , -0.30991598])]
Output:
[0.46611446 0.29759792 0.38559965]
Cost: 0.3735982945811435
Gradient:
[[-0.09056774 -0.11781622 -0.12092452 -0.08380387 -0.1155792 ]
 [ 0.04240628  0.05516476  0.05662015  0.03923925  0.05411733]
 [-0.01847567 -0.02403431 -0.0246684  -0.01709585 -0.02357796]]
