In [1]:
import numpy as np
import matplotlib.pyplot as pllt
from utils import sigmoid, relu, relu_backward, sigmoid_backward
from testCase import *
import h5py

In [2]:
def initialize_parameters(layersize):
    
    
    np.random.seed(2)
    L = len(layersize)
    parameters = dict()
    for i in range(1, L):
        parameters['W' + str(i)] = np.random.randn(layersize[i], layersize[i - 1]) * 0.01
        parameters['b' + str(i)] = np.zeros((layersize[i], 1))
    return parameters

In [3]:
param = initialize_parameters([20, 10, 5, 1])

In [4]:
def linear_forward(A,W,b):
    Z = np.dot(W,A) + b
    cache = (A, W,b)
    return Z, cache

In [5]:
def linear_forward_activation(AL_1,W,b, activation):

    Z, linear_cache = linear_forward(AL_1,W,b)
    if activation == "relu":
        AL, activation_cache = relu(Z)
    if activation == "sigmoid":
        AL, activation_cache = sigmoid(Z)
        
    cache = (linear_cache, activation_cache)    
    return AL , cache

In [6]:

AL_1, W, b = linear_forward_activation_testcase()
Z ,cache = linear_forward_activation(AL_1, W, b,"relu")
Z

array([[ 0.        ,  0.        ,  0.18524692,  3.25156647]])

In [7]:
def L_model_forward(X,parameters ,hidden_layers):
    
    caches = []
    A_prev = X
    
    
    for i in range(1,hidden_layers):
        A,cache = linear_forward_activation(A_prev,parameters['W' + str(i)],
                                                                         parameters['b' + str(i)], "relu")
        A_prev = A
        caches.append(cache)
    #output layer
    AL, cache = linear_forward_activation(A_prev,parameters['W' + str(hidden_layers)],
                                   parameters['b' + str(hidden_layers)], "sigmoid")
    caches.append(cache)
    
    
    print AL

In [8]:
X,parameters ,hidden_layers = L_model_forward_testCase()
L_model_forward(X,parameters ,hidden_layers)


[[ 1.2870236   2.90357109  2.14481915  5.93033737]]


In [9]:
def cost_function(AL, Y):
    m = Y.shape[1]
    
    cost = -1/m * np.sum(np.dot(Y.T, np.log(AL))  + np.sum(np.dot((1 - Y).T, np.log(1 - AL))))
    
    
    cost = np.squeeze(cost) 
    
    return cost

In [10]:
AL , y =  cost_function_testCase()
print cost_function(AL, y)

32.1054217993


In [11]:
def linear_backward(dZ, linear_cache):
    m = dZ.shape[1]
    
    AL_1 , W , b = linear_cache
    print AL_1.shape
    dw = 1/ m * np.dot(dZ,AL_1.T)
    db = 1 / m * np.sum(dZ, axis=1,keepdims=True)
    dAL_1 = np.dot(W.T, dZ)
    
    return dAL_1, dw , db  

In [12]:
dz , linear_cache = linear_backward_testCase()
print linear_backward(dz , linear_cache)

(3, 4)
(array([[ 1.04232747, -0.16475573, -0.44525808,  0.38297651],
       [-2.61586929,  2.07450183,  1.32425729, -2.87828788],
       [ 3.34845405, -2.06325402, -1.62138188,  3.00082321]]), array([[-0.,  0., -0.],
       [-0.,  0., -0.]]), array([[-0.],
       [-0.]]))


In [16]:
def linear_activation_backward(dA, cache, activation):
   
    (linear_cache, activation_cache) = cache   
    if activation == 'sigmoid':
        dZ = sigmoid_backward(dA,activation_cache)
        
        dAL_1, dw, db = linear_backward(dZ, linear_cache)
    elif activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
        dAL_1, dw, db = linear_backward(dZ, linear_cache)
        
    return dAL_1, dw, db 

In [22]:
def L_model_backward(AL, Y, cache):
    
    grad = {}
    L = len(cache)
    dAL = - np.divide(Y, AL) - np.divide((1 - Y), (1 - AL))
    current_cache = cache[L - 1]
    
    grad["dA" + str(L)] , grad["dw" + str(L)] , grad["db" + str(L)] = linear_activation_backward(dAL, current_cache,"sigmoid")
    
    
    for l in reversed(range(L - 1)):
        current_cache = cache[l]
        grad["dA" + str(l + 1)] , grad["dw" + str( l + 1)] , grad["db" + str(l + 1)] = linear_activation_backward(grad["dA" + str(l + 2)],
        current_cache,'relu')
    return grad

In [21]:
AL, Y, cache =  L_model_backward_testCase()
print L_model_backward(AL, Y, cache)

dz shappe  (1, 3)
(2, 3)
(5, 3)
None


In [3]:
def update_parameters(parameters, grad, learning_rate):
    for i in range(1,len(parameters) / 2 + 1):
        parameters['W' + str(i)] =  parameters['W' + str(i)] - grad['dw' + str(i)] * learning_rate
        parameters['b' + str(i)] =  parameters['b' + str(i)] - grad['db' + str(i)] * learning_rate
        
    return parameters    

In [6]:
parameters , grads = update_parameters_testCase()
print update_parameters(parameters, grads , 0.1)

KeyError: 'W1'