In [1]:
import numpy as np

def sigmoid(z):
    return 1/(1 + np.exp(-z))

def initialize_parameters_zeros(n_x, n_h, n_y):
    W1 = np.zeros((n_h, n_x))
    b1 = np.zeros((n_h, 1))
    W2 = np.zeros((n_y, n_h))
    b2 = np.zeros((n_y, 1))

    parameters = {
        "W1": W1,
        "b1" : b1,
        "W2": W2,
        "b2" : b2
    }
    
    return parameters

def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x)
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h)
    b2 = np.zeros((n_y, 1))

    parameters = {
        "W1": W1,
        "b1" : b1,
        "W2": W2,
        "b2" : b2
    }
    return parameters

def forward_prop(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    cache = {
        "A1": A1,
        "A2": A2
    }
    return A2, cache

def calculate_mse_cost(A2, Y):
    m = Y.shape[1]
    cost = (1/m) * np.sum((A2 - Y) ** 2)
    cost = np.squeeze(cost)
    
    return cost

def backward_prop_mse(X, Y, cache, parameters):
    A1 = cache["A1"]
    A2 = cache["A2"]

    W2 = parameters["W2"]
    m = X.shape[1]

    dA2 = 2 * (A2 - Y) / m
    dZ2 = dA2 * (A2 * (1 - A2))
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    grads = {
        "dW1": dW1,
        "db1": db1,
        "dW2": dW2,
        "db2": db2
    }

    return grads

def update_parameters(parameters, grads, learning_rate):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
    
    new_parameters = {
        "W1": W1,
        "W2": W2,
        "b1" : b1,
        "b2" : b2
    }

    return new_parameters

def model_general(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate, initialization_function):
    parameters = initialization_function(n_x, n_h, n_y)

    for i in range(0, num_of_iters+1):
        a2, cache = forward_prop(X, parameters)

        cost = calculate_mse_cost(a2, Y)

        grads = backward_prop_mse(X, Y, cache, parameters)

        parameters = update_parameters(parameters, grads, learning_rate)

        if(i%100 == 0):
            print('MSE cost after iteration# {:d}: {:f}'.format(i, cost))

    return parameters

def predict(X, parameters):
    a2, cache = forward_prop(X, parameters)
    yhat = a2
    yhat = np.squeeze(yhat)
    if(yhat >= 0.5):
        y_predict = 1
    else:
        y_predict = 0

    return y_predict

np.random.seed(2)

X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])
Y = np.array([[0, 1, 1, 0]])
m = X.shape[1]
n_x = 2
n_h = 2
n_y = 1
num_of_iters = 1000
learning_rate = 0.3

print("Training with parameters initialized to zeros:")
trained_parameters_zeros = model_general(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate, initialize_parameters_zeros)
print("---")
print("Training with parameters initialized to random numbers:")
trained_parameters_random = model_general(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate, initialize_parameters)


Training with parameters initialized to zeros:
MSE cost after iteration# 0: 0.250000
MSE cost after iteration# 100: 0.250000
MSE cost after iteration# 200: 0.250000
MSE cost after iteration# 300: 0.250000
MSE cost after iteration# 400: 0.250000
MSE cost after iteration# 500: 0.250000
MSE cost after iteration# 600: 0.250000
MSE cost after iteration# 700: 0.250000
MSE cost after iteration# 800: 0.250000
MSE cost after iteration# 900: 0.250000
MSE cost after iteration# 1000: 0.250000
---
Training with parameters initialized to random numbers:
MSE cost after iteration# 0: 0.326659
MSE cost after iteration# 100: 0.224119
MSE cost after iteration# 200: 0.194774
MSE cost after iteration# 300: 0.176018
MSE cost after iteration# 400: 0.160702
MSE cost after iteration# 500: 0.145844
MSE cost after iteration# 600: 0.129975
MSE cost after iteration# 700: 0.112459
MSE cost after iteration# 800: 0.093598
MSE cost after iteration# 900: 0.075077
MSE cost after iteration# 1000: 0.059148
