In [10]:
import numpy as np
from random import seed

In [40]:
def Linear(num_nodes_input_layer, num_nodes_output_layer):
    """Dimensions: output is rows, input is columns."""
    seed(1)
    weights = np.random.rand(num_nodes_output_layer, num_nodes_input_layer + 1)
    # biases = np.ones((weights.shape[0], 1))

    # Stack the original array and the column of ones
    # return np.column_stack((biases, weights ))
    return weights

In [42]:


def compute_forward(inputs, layer):
    ## linear combination w0x0 + w1x1 + w2x2
    linear_combinations = []
    # print(inputs, inputs.shape)
    # print(np.ones(inputs.shape[1]))
    inputs = np.c_[ np.ones(inputs.shape[0]), inputs  ]    ## add bias to the inputs
    # print(inputs)
    # print(layer)

    linear_combinations = np.matmul(inputs, layer.T) ## logits or z
    
    # print(linear_combinations)

    sigmoids = 1/(  1 + np.exp( -linear_combinations ) ) ## compute a
    # print(sigmoids)

    return linear_combinations, sigmoids

In [7]:
def binary_cross_entropy_loss(output_sigmoids, target):
    
    ## m is number of samples
    ## k is total number of output units

    log_sigmoids = np.log(output_sigmoids)
    y_times_log_sigmoids = np.matmul(log_sigmoids, target.T)
    # print(y_times_log_sigmoids)

    one_minus_log_sigmoids = 1 - np.log(output_sigmoids)
    one_minus_y_times_log_sigmoids = np.matmul(one_minus_log_sigmoids, (1 - target).T)
    # print(one_minus_y_times_log_sigmoids)
    
    return sum(sum((y_times_log_sigmoids + one_minus_y_times_log_sigmoids)))/len(target) ## sum across the output units and sum across the samples


In [43]:
def compute_backpropogation(sigmoids: list, layers: list, y, x):
    """"""
    # layers = [hidden_layer1, output_layer]
    layers.reverse()
    # sigmoids = [sigmoids2, output_sigmoids]
    sigmoids.insert(0, x)
    sigmoids.reverse()
    gradients = []


    for i, layer in enumerate(layers):
        if i == 0:
            delta = (sigmoids[i] - y)
            # print(delta.shape)
            previous_sigmoids = np.c_[ np.ones(sigmoids[i+1].shape[0]), sigmoids[i+1]  ]    ## add bias to the inputs
            # print(sigmoids[i])
            # print("__ __ __ __")
            # print(previous_sigmoids)

            ## delta has shape (samples x nodes)
            ## sigmoids has shape (samples x activations + 1)
            ## ex delta = sx2 (s = samples & nodes = 2 )
            ##    sigmoids = sx3 (s = samples & there are 2 sigmoids and 1 to multiple the bias by)
            gradients.append(np.matmul(delta.T, previous_sigmoids))


        else:
            # print("_______________________________")
            ## after the first delta, every previous delta is
            ## (delta * (weights from the previous layer)) * sigmoids or x
            delta = np.matmul(delta, layers[i-1][:,1:]) * sigmoids[i] * (1- sigmoids[i])
            previous_sigmoids = np.c_[ np.ones(sigmoids[i+1].shape[0]), sigmoids[i+1]  ]    ## add bias to the inputs
            # print(previous_sigmoids)
            gradients.append(np.matmul(delta.T, previous_sigmoids))


    # print(gradients)
    return gradients

def step(weights: list, gradients: list, lr = 0.001):
    gradients.reverse()
    for weight, gradient in zip(weights, gradients): ## so the first gradient is the input layers graidents
        yield weight - (gradient * lr)

In [46]:
### put it all together:
EPOCHS = 20
x = np.array([[0.1,0.2],
              [-.04,-0.6], 
              [0.1,0.2]]) # x1, x2

# # y = np.array([[1,0],[1,1],[0,1]])
y = np.array([[0],[1],[0]])


seed(1)
hidden_layer1 = Linear(2,2) 
seed(1)
output_layer = Linear(2,1) 

print(hidden_layer1, output_layer)
for epoch in range(EPOCHS):

    z2, sigmoids2 = compute_forward(x, hidden_layer1)

    z_output, output_sigmoids = compute_forward(sigmoids2, output_layer)

    loss = binary_cross_entropy_loss(output_sigmoids, y)


    # l2_gradients = compute_backpropogation(output_sigmoids, y, sigmoids2)


    # l1_gradients = compute_backpropogation(sigmoids2, y, x)

    # output_layer1 = step(hidden_layer1, l1_gradients, 0.1)

    gradients = compute_backpropogation(sigmoids = [sigmoids2, output_sigmoids], layers = [hidden_layer1, output_layer], y=y, x=x)
    hidden_layer1, output_layer = step(weights = [hidden_layer1, output_layer], gradients = gradients, lr = 0.05)
    # print(gradients)
    

    print(f"----------------------epoch: {epoch}-----------------------")
    print(f"predictions:\n{(output_sigmoids > 0.5).astype(int)}\nActual:\n{y}, \n {output_sigmoids}")
    print(f"error: {loss}")
    print(output_sigmoids)

[[0.55382914 0.85385375 0.19732555]
 [0.05187953 0.68335227 0.00962818]] [[0.72773258 0.49810198 0.82095659]]
----------------------epoch: 0-----------------------
predictions:
[[1]
 [1]
 [1]]
Actual:
[[0]
 [1]
 [0]], 
 [[0.81663454]
 [0.80852021]
 [0.81663454]]
error: 2.2058922656628943
[[0.81663454]
 [0.80852021]
 [0.81663454]]
----------------------epoch: 1-----------------------
predictions:
[[1]
 [1]
 [1]]
Actual:
[[0]
 [1]
 [0]], 
 [[0.79659073]
 [0.78865898]
 [0.79659073]]
error: 2.2307499203998145
[[0.79659073]
 [0.78865898]
 [0.79659073]]
----------------------epoch: 2-----------------------
predictions:
[[1]
 [1]
 [1]]
Actual:
[[0]
 [1]
 [0]], 
 [[0.77603028]
 [0.76836968]
 [0.77603028]]
error: 2.256870593427961
[[0.77603028]
 [0.76836968]
 [0.77603028]]
----------------------epoch: 3-----------------------
predictions:
[[1]
 [1]
 [1]]
Actual:
[[0]
 [1]
 [0]], 
 [[0.75516558]
 [0.74785476]
 [0.75516558]]
error: 2.284060992023609
[[0.75516558]
 [0.74785476]
 [0.75516558]]
----