In [237]:
import numpy as np
import matplotlib.pyplot as plt

In [303]:
def initialize_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims) # number of layers in the network
    
    for l in range(1,L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    return parameters

In [304]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z)), Z
def relu(Z):
    return np.maximum(Z, 0), Z

In [305]:
def forward_pass(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache
def linear_activation_forward(A_prev, W, b, activation):
    Z, linear_cache = forward_pass(A_prev, W, b)
    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
    if activation == 'relu':
        A, activation_cache = relu(Z)
    cache = (linear_cache, activation_cache)
    return A, cache

In [306]:
def L_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W'+str(l)], parameters['b'+str(l)], 'relu')
        caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters['W'+str(L)], parameters['b'+str(L)], 'sigmoid')
    caches.append(cache)
    return AL, caches

In [307]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1/m) * np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1-Y, np.log(1-AL)))
    cost = np.squeeze(cost)
    return cost

In [308]:
def relu_backward(dA, activation_cache):
    Z = activation_cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    return dZ

def sigmoid_backward(dA, activation_cache):
    Z = activation_cache
    s = 1/(1+ np.exp(-Z))
    dZ = dA * s * (1-s)
    return dZ

def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = np.dot(dZ, A_prev.T)/m
    db = np.sum(dZ, axis=1, keepdims= True) / m
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
    if activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

In [309]:
def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = - (np.divide(Y,AL) - np.divide(1-Y, 1-AL))
    current_cache = caches[-1]
    
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, 'sigmoid')
    
    grads['dA'+str(L-1)] = dA_prev_temp
    grads['dW'+str(L)] = dW_temp
    grads['db'+str(L)] = db_temp
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads['dA'+str(l+1)], current_cache, 'relu')
        
        grads['dA'+str(l)] = dA_prev_temp
        grads['dW'+str(l+1)] = dW_temp
        grads['db'+str(l+1)] = db_temp
    return grads


In [310]:
def update_parameters(params, grads, learning_rate):
    parameters = params.copy()
    L = len(parameters) // 2
    
    for l in range(L):
        parameters['W' + str(l+1)] = parameters['W' + str(l+1)] - learning_rate*grads['dW' + str(l+1)]
        parameters['b' + str(l+1)] = parameters['b' + str(l+1)] - learning_rate*grads['db' + str(l+1)]
    return parameters

In [322]:
def L_layer_model(X, Y, layer_dims, learning_rate = 0.001, num_iterations = 3000, print_cost= False):
    costs = []
    parameters = initialize_parameters(layer_dims)
    
    for i in range(0, num_iterations):
        AL,  caches = L_model_forward(X, parameters)
        cost = compute_cost(AL, Y)
        grads = L_model_backward(AL, Y, caches)
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if print_cost and i % 1000 == 0 :
            print(cost, f'at iter {i}')
            costs.append(cost)
    return parameters, cost
def predict(X, params):
    AL,  caches = L_model_forward(X, params)
    l = np.zeros_like(AL)
    l[AL>0.9]=1
    return l
def acc(predict, label):
    y_hat = np.squeeze(predict)
    label = np.squeeze(label)
    c = 0
    for x in range(len(y_hat)):
        if int(y_hat[x]) == label[x]:
            c+=1
    return c/y_hat.shape[0]

In [323]:
from sklearn.datasets import make_moons
features, true_labels = make_moons(n_samples = 1000, noise = 0.05)

In [324]:
X = features
Y = np.expand_dims(true_labels, axis=0)
layer_dims = [2,4,4,1]
params, cost = L_layer_model(X.T, Y, layer_dims, learning_rate = 0.1, num_iterations = 10000, print_cost= True)

1.1711699442160648 at iter 0
0.1996401699175948 at iter 1000
0.05121364979027261 at iter 2000
0.01490712667482803 at iter 3000
0.007923017056800827 at iter 4000
0.005164007925247137 at iter 5000
0.003794581299415803 at iter 6000
0.002983631096035685 at iter 7000
0.002452317273818909 at iter 8000
0.0020784388939241102 at iter 9000


In [327]:
X[0]

array([-0.95098421,  0.06225272])

In [325]:
y_hat = predict(X.T, params)
acc(y_hat, Y)

1.0

In [326]:
params

{'W1': array([[ 0.66599498, -2.07048096],
        [-1.95961655, -0.02695126],
        [-1.0793301 , -0.32075754],
        [ 3.13020064,  0.54817241]]),
 'b1': array([[ 2.0205077 ],
        [-0.06599005],
        [-0.53239532],
        [-0.5984702 ]]),
 'W2': array([[ 2.34531671, -0.07713586,  0.37112487, -3.71449736],
        [-0.09171985,  1.64925663,  0.20430544,  0.02073711],
        [-0.61211549,  1.51523819,  0.97927221, -0.34545541],
        [ 1.82240833, -1.37416432,  2.12484406,  1.35656784]]),
 'b2': array([[ 2.71383523],
        [ 4.1669077 ],
        [ 0.37319252],
        [-1.65317042]]),
 'W3': array([[ 4.7522303 , -4.5553676 , -0.65272086,  2.41732295]]),
 'b3': array([[-1.40511813]])}