In [1]:
import numpy as np
import copy

In [2]:
def load_planar_dataset():
    np.random.seed(1)
    m = 400 # number of examples
    N = int(m/2) # number of points per class
    D = 2 # dimensionality
    X = np.zeros((m,D)) # data matrix where each row is a single example
    Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
    a = 4 # maximum ray of the flower

    for j in range(2):
        ix = range(N*j,N*(j+1))
        t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j
        
    X = X.T
    Y = Y.T

    return X, Y
X, Y = load_planar_dataset()

In [3]:
print("X shape: ", X.shape)
print("Y shape: ", Y.shape)

X shape:  (2, 400)
Y shape:  (1, 400)


In [47]:
def layer_sizes(X, Y):
    n_x = X.shape[0]
    n_y = Y.shape[0]
    return (n_x, n_y)

In [48]:
def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))
    parameters = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return parameters

In [49]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    return A

In [50]:
def forward_propagation(X, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    Z1 = np.matmul(W1, X) + b1
    A1 = np.tanh(Z1)
    
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    
    return A2, cache

In [51]:
def compute_cost(A2, Y):
    m = A2.shape[1]
    cost = -np.sum((Y * np.log(A2)) + ((1 - Y) * np.log(1 - A2))) / m
    return cost

In [52]:
def backward_propagation(parameters, cache, X, Y):
    m = X.shape[1]
    
    W2 = parameters['W2']
    A1 = cache['A1']
    A2 = cache['A2']
    
    dZ2 = A2 - Y
    dW2 = np.matmul(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis = 1, keepdims = True) / m
    dZ1 = np.matmul(W2.T, dZ2) * (1 - np.power(A1, 2))
    dW1 = np.matmul(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis = 1, keepdims = True) / m
    
    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    
    return grads

In [53]:
def update_parameters(parameters, grads, learning_rate=1.2):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    parameters = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    
    return parameters

In [54]:
def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):
    np.random.seed(3)
    n_x, n_y = layer_sizes(X, Y)
    
    parameters = initialize_parameters(n_x, n_h, n_y)
    
    for i in range(num_iterations):
        A2, cache = forward_propagation(X, parameters)
        
        cost = compute_cost(A2, Y)
        
        grads = backward_propagation(parameters, cache, X, Y)
        
        parameters = update_parameters(parameters, grads)
        
        if print_cost and i % 1000 == 0 :
            print(f"cost at iteration {i} = {cost}")
    
    
    return parameters

In [55]:
def predict(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    
    predictions  = (A2 > 0.5).astype(int)
    return predictions

In [56]:
parameters = nn_model(X, Y, n_h = 4, num_iterations = 10000, print_cost=True)

cost at iteration 0 = 0.6931621661402946
cost at iteration 1000 = 0.2586250682869045
cost at iteration 2000 = 0.23933351654583135
cost at iteration 3000 = 0.23080163442975743
cost at iteration 4000 = 0.22552802708176348
cost at iteration 5000 = 0.22184467595022694
cost at iteration 6000 = 0.21909438354878738
cost at iteration 7000 = 0.22080588811611293
cost at iteration 8000 = 0.21944639423275003
cost at iteration 9000 = 0.21849633619133751


In [57]:
def f1(y_hat, y):
    tp = np.sum((y_hat == 1) & (y == 1))
    fp = np.sum((y_hat == 1) & (y == 0))
    fn = np.sum((y_hat == 0) & (y == 1))
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * precision * recall / (precision + recall)
    return f1

In [58]:
predictions = predict(parameters, X)
f1_score = f1(predictions, Y)
f1_score * 100

90.63291139240506

In [61]:
hidden_layer_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

for i, n_h in enumerate(hidden_layer_sizes):
    parameters = nn_model(X, Y, n_h, num_iterations = 10000)
    predictions = predict(parameters, X)
    f1_score = f1(predictions, Y)
    print(f"The F1 score for {n_h} hidden layer = {f1_score * 100} %")

The F1 score for 1 hidden layer = 67.5 %
The F1 score for 2 hidden layer = 67.00251889168767 %
The F1 score for 3 hidden layer = 91.044776119403 %
The F1 score for 4 hidden layer = 90.63291139240506 %
The F1 score for 5 hidden layer = 91.1764705882353 %
The F1 score for 6 hidden layer = 90.81885856079404 %
The F1 score for 7 hidden layer = 88.8888888888889 %
The F1 score for 8 hidden layer = 91.0891089108911 %
The F1 score for 9 hidden layer = 90.67357512953367 %
The F1 score for 10 hidden layer = 91.23711340206185 %
