In [56]:
import numpy as np

In [57]:
class Layer:
    def __init__(self, input_dim, output_dim, activation):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.activation = activation

In [58]:
NN_ARCHITECTURE = [
    Layer(input_dim=2, output_dim=25, activation="relu"),
    Layer(input_dim=25, output_dim=50, activation="relu"),
    Layer(input_dim=50, output_dim=50, activation="relu"),
    Layer(input_dim=50, output_dim=25, activation="relu"),
    Layer(input_dim=25, output_dim=1, activation="sigmoid"),
]

In [59]:
def init_layers(nn_architecture, seed=99):
    np.random.seed(seed)
    number_of_layer = len(nn_architecture)
    params_values = {}
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer.input_dim
        layer_output_size = layer.output_dim

        params_values[f"W{layer_idx}"] = np.random.randn(layer_output_size, layer_input_size) * 0.1
        params_values[f"b{layer_idx}"] = np.random.randn(layer_output_size, 1) * 0.1
    return params_values

In [60]:
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))


def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)


def relu(Z):
    return np.maximum(0, Z)


def relu_backward(dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    return dZ


activation_func = {
    'relu': relu,
    'sigmoid': sigmoid,
}
backward_activation_func = {
    'relu': relu_backward,
    'sigmoid': sigmoid_backward,
}

In [61]:
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation='relu'):
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    if activation not in activation_func:
        raise Exception('Non-supported activation function')
    return activation_func[activation](Z_curr), Z_curr

In [62]:
def full_forward_propagation(X, params_values, nn_architecture):
    memo = {}
    A_curr = X

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        act_func_curr = layer.activation
        W_curr = params_values[f'W{layer_idx}']
        b_curr = params_values[f'b{layer_idx}']
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, act_func_curr)
        memo[f'A{idx}'] = A_prev
        memo[f'Z{layer_idx}'] = Z_curr
    return A_curr, memo

In [63]:
def get_cost_value(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    return np.squeeze(cost)

In [64]:
def convert_prob_into_class(probs):
    probs_ = np.copy(probs)
    probs_[probs_ > 0.5] = 1
    probs_[probs_ <= 0.5] = 0
    return probs_

In [65]:
def get_accuracy_value(Y_hat, Y):
    Y_hat_ = convert_prob_into_class(Y_hat)
    return (Y_hat_ == Y).all(axis=0).mean()

In [66]:
def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
    m = A_prev.shape[1]

    if activation not in backward_activation_func:
        raise Exception('Non-supported activation function')

    dZ_curr = backward_activation_func[activation](dA_curr, Z_curr)

    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)

    return dA_prev, dW_curr, db_curr

In [67]:

def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}

    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)

    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));

    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer.activation

        dA_curr = dA_prev

        A_prev = memory[f"A{layer_idx_prev}"]
        Z_curr = memory[f"Z{layer_idx_curr}"]

        W_curr = params_values[f"W{layer_idx_curr}"]
        b_curr = params_values[f"b{layer_idx_curr}"]

        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)

        grads_values[f"dW{layer_idx_curr}"] = dW_curr
        grads_values[f"db{layer_idx_curr}"] = db_curr

    return grads_values

In [68]:
def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture, 1):
        params_values[f"W{layer_idx}"] -= learning_rate * grads_values[f"dW{layer_idx}"]
        params_values[f"b{layer_idx}"] -= learning_rate * grads_values[f"db{layer_idx}"]
    return params_values;

In [69]:
def train(X, Y, nn_architecture, epochs, learning_rate, verbose=False, callback=None):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []

    for i in range(epochs):
        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)

        if i % 50 == 0:
            if verbose:
                print("Iteration: {:05} - cost: {:.5f} - accuracy: {:.5f}".format(i, cost, accuracy))
            if callback is not None:
                callback(i, params_values)
    return params_values

In [70]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [71]:
# number of samples in the data set
N_SAMPLES = 1000
# ratio between training and test sets
TEST_SIZE = 0.1

In [72]:
X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)

In [None]:
# Training
params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), NN_ARCHITECTURE, 10000, 0.01)

In [None]:
# Prediction
Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, NN_ARCHITECTURE)

In [None]:
# Accuracy achieved on the test set
acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))
print("Test set accuracy: {:.2f} - David".format(acc_test))