In [128]:
import numpy as np

In [129]:
def fwb(W, B, X):

    return np.dot(W, X) + B

In [130]:
def sigmoid(z):
    
    exp = np.exp(-z)
    g = 1 / (1 + exp)
    return g

In [170]:
def compute_layer(W, B, X):

    z = fwb(W, B, X)
    return sigmoid(z), z

In [141]:
def cost_function(y_pred, y):

    m = y.shape[0]
    cost = -np.sum( y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred)) / m
    return cost

In [198]:
def compute_derivatives_for_hidden_layer(a1, a2, z2, X, y, W2):

    dz2 = a2 - y
    
    dz1 = (W2.T @ dz2) * a1 * ( 1 - a1)

    dw1 = np.outer(dz1, X)
    db1 = dz1

    return dw1, db1


In [196]:
def compute_derivatives_for_output_layer(a1, a2, X, y):

    dz2 = a2 - y
    dw2 = np.outer(dz2, a1)
    db2 = dz2
    return dw2, dz2

In [234]:
def compute_model(W1, B1, W2, B2, X):

    a1, z1 = compute_layer(W1, B1, X)
    a2, z2 = compute_layer(W2, B2, a1)

    return a1, a2, z1, z2

In [206]:
def compute_graient_descnet(a1, a2, z1, z2, W1, W2, B1, B2, X, y, alpha):

    dw1, db1 = compute_derivatives_for_hidden_layer(a1, a2, z2, X, y, W2)
    dw2, db2 = compute_derivatives_for_output_layer(a1, a2, X, y)

    W1 -= alpha * dw1
    B1 -= alpha * db1
    W2 -= alpha * dw2
    B2 -= alpha * db2
    
    
    return W1, B1, W2, B2

In [221]:
X = np.array([0, 1], dtype=float)
y = np.array([1], dtype=float)

W1 = np.array([

    [1, 1],
    [1, 1],
], dtype=float)

B1 = np.array([0, 0], dtype=float)

W2 = np.array([
    [1, 1]
], dtype=float)

B2 = np.array([0], dtype=float)

alpha = 0.01

In [257]:

epochs = 40000

for i in range(0, epochs):

    a1, a2, z1, z2 = compute_model(W1, B1, W2, B2, X)

    compute_graient_descnet(a1, a2, z1, z2, W1, W2, B1, B2, X, y, alpha=0.001)

    print(cost_function(a2, y), "------", epochs - i -1)


0.0026038182095427173 ------ 39999
0.002603799196341924 ------ 39998
0.0026037801834149854 ------ 39997
0.002603761170762124 ------ 39996
0.0026037421583832284 ------ 39995
0.002603723146278076 ------ 39994
0.002603704134447112 ------ 39993
0.0026036851228898907 ------ 39992
0.0026036661116066355 ------ 39991
0.002603647100597457 ------ 39990
0.00260362808986191 ------ 39989
0.0026036090794006633 ------ 39988
0.002603590069212937 ------ 39987
0.002603571059299287 ------ 39986
0.002603552049659603 ------ 39985
0.002603533040293773 ------ 39984
0.0026035140312017976 ------ 39983
0.002603495022383565 ------ 39982
0.002603476013839409 ------ 39981
0.002603457005568885 ------ 39980
0.0026034379975723265 ------ 39979
0.002603418989849845 ------ 39978
0.0026033999824009945 ------ 39977
0.002603380975225887 ------ 39976
0.0026033619683248567 ------ 39975
0.0026033429616974576 ------ 39974
0.0026033239553440238 ------ 39973
0.0026033049492644443 ------ 39972
0.0026032859434583847 ------ 39971
0

In [265]:
def compute_prediction(X):

    a1, a2, z1, z2 = compute_model(W1, B1, W2, B2, X)

    print(a2, z2)

    if (a2 >= 0.5):

        a2 = 1
    else:

        a2 = 0

    return a2

In [274]:
compute_prediction([1, 0])

[0.99727061] [5.90094457]


1