In [128]:
import numpy as np

In [129]:
def fwb(W, B, X):

    return np.dot(W, X) + B

In [130]:
def sigmoid(z):
    
    exp = np.exp(-z)
    g = 1 / (1 + exp)
    return g

In [170]:
def compute_layer(W, B, X):

    z = fwb(W, B, X)
    return sigmoid(z), z

In [141]:
def cost_function(y_pred, y):

    m = y.shape[0]
    cost = -np.sum( y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred)) / m
    return cost

In [198]:
def compute_derivatives_for_hidden_layer(a1, a2, z2, X, y, W2):

    dz2 = a2 - y
    
    dz1 = (W2.T @ dz2) * a1 * ( 1 - a1)

    dw1 = np.outer(dz1, X)
    db1 = dz1

    return dw1, db1


In [196]:
def compute_derivatives_for_output_layer(a1, a2, X, y):

    dz2 = a2 - y
    dw2 = np.outer(dz2, a1)
    db2 = dz2
    return dw2, dz2

In [234]:
def compute_model(W1, B1, W2, B2, X):

    a1, z1 = compute_layer(W1, B1, X)
    a2, z2 = compute_layer(W2, B2, a1)

    return a1, a2, z1, z2

In [206]:
def compute_graient_descnet(a1, a2, z1, z2, W1, W2, B1, B2, X, y, alpha):

    dw1, db1 = compute_derivatives_for_hidden_layer(a1, a2, z2, X, y, W2)
    dw2, db2 = compute_derivatives_for_output_layer(a1, a2, X, y)

    W1 -= alpha * dw1
    B1 -= alpha * db1
    W2 -= alpha * dw2
    B2 -= alpha * db2
    
    
    return W1, B1, W2, B2

In [364]:
X = np.array([
    [1, 0],
    [1, 1],
    [0, 1],
    [0, 0]
], dtype=float)

y = np.array([
    [1],
    [0],
    [1],
    [0]
], dtype=float)

W1 = np.array([

    [1, 1],
    [1, 1],
], dtype=float)

B1 = np.array([0, 0], dtype=float)

W2 = np.array([
    [1, 1]
], dtype=float)

B2 = np.array([0], dtype=float)

alpha = 0.01

In [365]:

epochs = 10000

for i in range(0, epochs):

    print("Loss == ", cost_function(a2, y), "------", "epochs == ", epochs - i -1)

    for i in range(0, 4):

        a1, a2, z1, z2 = compute_model(W1, B1, W2, B2, X[i])

        compute_graient_descnet(a1, a2, z1, z2, W1, W2, B1, B2, X[i], y[i], alpha=0.001)


Loss ==  1.105691708121604 ------ epochs ==  9999
Loss ==  0.8130396846208665 ------ epochs ==  9998
Loss ==  0.8125445557707238 ------ epochs ==  9997
Loss ==  0.8120510381391017 ------ epochs ==  9996
Loss ==  0.8115591284814361 ------ epochs ==  9995
Loss ==  0.8110688235495225 ------ epochs ==  9994
Loss ==  0.8105801200915772 ------ epochs ==  9993
Loss ==  0.8100930148522962 ------ epochs ==  9992
Loss ==  0.8096075045729161 ------ epochs ==  9991
Loss ==  0.8091235859912732 ------ epochs ==  9990
Loss ==  0.8086412558418639 ------ epochs ==  9989
Loss ==  0.8081605108559046 ------ epochs ==  9988
Loss ==  0.8076813477613902 ------ epochs ==  9987
Loss ==  0.8072037632831556 ------ epochs ==  9986
Loss ==  0.8067277541429334 ------ epochs ==  9985
Loss ==  0.8062533170594142 ------ epochs ==  9984
Loss ==  0.8057804487483051 ------ epochs ==  9983
Loss ==  0.8053091459223903 ------ epochs ==  9982
Loss ==  0.8048394052915889 ------ epochs ==  9981
Loss ==  0.8043712235630145 ----

In [356]:
def compute_prediction(X):

    a1, a2, z1, z2 = compute_model(W1, B1, W2, B2, X)

    print("a2", a2, "z2", z2)

    # print("W1", W1, "W2", W2)
    # print("B1", B1, "B2", B2)

    if (a2 >= 0.5):

        a2 = 1
    else:

        a2 = 0

    return a2

In [358]:
compute_prediction([1, 1])

a2 [0.61491055] z2 [0.46800116]


1