In [1]:
import numpy as np

In [5]:
def initialize_params(dimensions):
    
    parameters = dict()
    
    for i in range(1, len(dimensions)):
        parameters["W"+str(i)] = np.random.randn(dimensions[i], dimensions[i-1]) * 0.01
        parameters["b"+str(i)] = np.zeros((dimensions[i], 1))
    
    return parameters

In [6]:
dims = [4,10,10,1]
params = initialize_params(dims)
print(params)

{'W1': array([[-0.30973412, -0.41944514,  0.27516686,  1.05675313],
       [-1.22598812,  0.30653189, -0.62169419,  0.88698853],
       [-1.95463266, -0.40737625,  1.33604203, -0.12435826],
       [-1.55602302, -0.42086504, -1.1119003 , -1.73394389],
       [-0.95666562,  0.28567931, -0.55731953,  0.35714735],
       [ 1.38909714, -0.20420179,  0.14837665, -1.30697474],
       [-0.45353906, -0.52658264, -0.73650632, -0.39726784],
       [ 0.57264296, -0.57144762,  0.09953035,  0.75342218],
       [-0.13868832, -0.07349183,  1.89818268,  0.87977535],
       [-0.27659495, -0.49225184, -0.60775574, -0.62072667]]), 'b1': array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]]), 'W2': array([[-1.23229028, -0.71065805,  0.39320312,  0.47882018, -2.06741748,
         0.77450411, -0.46446971, -0.83014464,  0.38726214,  0.34305973],
       [ 0.69525392, -0.46131151,  0.39236465, -0.59573155, -0.05322578,
        -0.171918

In [12]:
def relu(Z):
    A = np.max(0, Z)
    return A, Z

def sigmoid(Z):
    A = 1 / 1 + np.exp(-Z)
    return A, Z

def linear_forward(Aprev, W, b):
    Z = np.dot(W, Aprev) + b
    linear_cache = (Aprev, W, b)
    
    return Z, linear_cache

def linear_activation_forward(Aprev, W, b, activation):
    
    Z, linear_cache = forward_prop(Aprev, W, b)
    
    if activation == 'relu':
        A, activation_cache = relu(Z)
        
    elif activation == 'sigmoid':
        A, activation_cache = relu(Z)
    
    cache = (linear_cache, activation_cache)
    
    return A, cache

In [10]:
def L_model_forward(X, parameters):
    
    caches = []
    
    A = X
    L = len(parameters//2)
    for i in range(1, L):
        Aprev = A
        A, cache = linear_activation_forward(Aprev, parameters["W"+str(i)], parameters["b"+str(i)], "relu")
        caches.append(cache)
        
    AL = linear_activation_forward(Aprev, parameters["W"+str(L)], parameters["b"+str(L)], "sigmoid")
    caches.append(cache)
    
    return AL, caches

In [13]:
def compute_cost(AL, Y):
    
    m = Y.shape[1]
    
    cost = -1/m * np.sum(np.product(Y, np.log(AL)) + np.product(1-Y, np.log(1-AL)))
    cost = np.squeeze(cost)
    
    return cost

In [23]:
def relu_backwards(dA, activation_cache):
    derivative = [0 if activation_cache<0 else 1]
    dZ = np.product(dA, derivative)
    
def relu_backwards(dA, activation_cache):
    sig = sigmoid(Z)[0]
    derivative = sig*(1-sig)
    dZ = np.product(dA, derivative)

In [24]:
def linear_backward(dZ, linear_cache):
    A_prev, W, b = linear_cache
    
    dW = 1/m * np.dot(dZ, A_prev.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db
    
    
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == 'relu':
        dZ = relu_backwards(dA, activation_cache)
    
    elif activation == 'sigmoid':
        dZ = sigmoid_backwards(dA, activation_cache)
        
    dA_prev, dW, db = linear_backward(dZ, linear_cache)

In [27]:
def L_model_backward(AL, Y, caches):
    
    L = len(caches)
    grads = dict()
    Y = Y.reshape(AL.shape)
    
    dAL = - np.divide(Y, AL) + np.divide(1-Y, 1-AL)
    
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid')
    
    for i in reversed(range(L-1)):
        current_cache = caches[i]
        grads["dA" + str(i)], grads["dW" + str(i+1)], grads["db" + str(i+1)] = lineear_activation_backward(grads["dA"+str(i+1)], current_cache, 'relu')
        
        
    return grads

In [26]:
def update_params(parameters, grads, learning_rate):
    L = len(parameters)//2
    
    for i in range(L):
        parameters["W"+str(i)] = parameters["W"+str(i)] - (learning_rate * grads["dW"+str(i)])
        parameters["b"+str(i)] = parameters["b"+str(i)] - (learning_rate * grads["db"+str(i)])
        
    return parameters

In [28]:
def model(X, Y, dims, num_iterations=1000, learning_rate=0.01):
    parameters = initialize_params(dims)
    costs = []
    for i in range(num_iterations):
        AL, caches = L_model_forward(X, parameters)
        cost = compute_cost(AL, Y)
        grads = L_model_backward(AL, Y, caches)
        parameters = update_params(parameters, grads, learning_rate)
        
        if i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
            costs.append(cost)