# Author: Anukool Purohit
### Nueral Network Practice

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

### Helper Functions

##### Relu Function

In [3]:
def relu(Z):
    A = (Z>0)*Z
    cache = Z
    return A, cache

In [4]:
Z = np.random.randn(5,1)
print("Z:  ")
print(Z)
print("Relu: ")
A, cache = relu(Z)
print(A)
print(Z.shape == A.shape)
print(cache)

Z:  
[[ 1.43479746]
 [ 0.24563544]
 [ 1.25322491]
 [-0.69529393]
 [ 1.91073356]]
Relu: 
[[ 1.43479746]
 [ 0.24563544]
 [ 1.25322491]
 [-0.        ]
 [ 1.91073356]]
True
[[ 1.43479746]
 [ 0.24563544]
 [ 1.25322491]
 [-0.69529393]
 [ 1.91073356]]


##### Sigmoid Function

In [5]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-1 * Z))
    assert Z.shape == A.shape
    cache = Z
    return A, cache

In [6]:
eg = np.random.randn(5,1)
print("E.g:  ")
print(eg)
print("Sig: ")
sig, cache = sigmoid(eg)
print(sig)
print(eg.shape == sig.shape)
print(cache)

E.g:  
[[-1.10818989]
 [-0.35323388]
 [ 0.72687068]
 [ 0.71237052]
 [ 0.644083  ]]
Sig: 
[[0.2482085 ]
 [0.41259843]
 [0.67411819]
 [0.67092475]
 [0.65567584]]
True
[[-1.10818989]
 [-0.35323388]
 [ 0.72687068]
 [ 0.71237052]
 [ 0.644083  ]]


##### Relu Gradient

In [43]:
def relu_gradient(dA, cache):
    Z = cache
    dZ = np.array(dA, copy= True)
    dZ[Z <= 0] = 0
    assert dZ.shape == Z.shape
    return dZ

In [44]:
dA = np.random.randn(4,1)
cache = np.random.randn(4,1)
dZ = relu_gradient(dA, cache)
print(dZ)

[[-1.01486686]
 [ 0.        ]
 [ 0.        ]
 [ 0.77105174]]


##### Sigmoid Gradient

In [45]:
def sigmoid_gradient(dA, cache):
    Z = cache
    
    s = 1 / (1 + np.exp(-1 * Z))
    
    dZ = dA * s * (1-s)
    assert dZ.shape == Z.shape
    return dZ

In [46]:
dA = np.random.randn(4,1)
cache = np.random.randn(4,1)
dZ = sigmoid_gradient(dA, cache)
print(dZ)

[[-0.25744408]
 [ 0.46807984]
 [-0.38191486]
 [-0.06783684]]


## Functions for NN arch

In [7]:
def initialize_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims)
    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        assert parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1])
        assert parameters['b' + str(l)].shape == (layer_dims[l], 1)
    return parameters

In [8]:
parameters = initialize_parameters([5,4,2])
print(parameters['W1'])
print(parameters['b1'])

[[-0.00576118 -0.00745127  0.01388894  0.00514156 -0.00992654]
 [ 0.00343834  0.00729609 -0.00677551  0.00559101 -0.01244712]
 [-0.00908862  0.01085418 -0.00407766  0.00475849 -0.00043449]
 [-0.01178733 -0.0084548  -0.00187678  0.00924892  0.0052422 ]]
[[0.]
 [0.]
 [0.]
 [0.]]


#### Forward pass

In [9]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    
    assert Z.shape == (W.shape[0], A.shape[1])
    cache = (A, W, b)
    return Z, cache

In [10]:
parameters = initialize_parameters([3,4,5])
A = np.random.randn(3,1)
print("A: ")
print(A)
W = parameters['W1']
b = parameters['b1']
Z, cache = linear_forward(A, W, b)
print("Z: ")
print(Z)
print("Cache: ")
print("    A: ")
print((cache[0]))
print("    W: ")
print((cache[1]))
print("    b: ")
print((cache[2]))

A: 
[[ 0.76345615]
 [-2.20648138]
 [ 1.22391736]]
Z: 
[[-0.00619592]
 [ 0.01568658]
 [ 0.01808807]
 [-0.00821288]]
Cache: 
    A: 
[[ 0.76345615]
 [-2.20648138]
 [ 1.22391736]]
    W: 
[[ 0.00480866 -0.0020054  -0.01167727]
 [-0.00210598 -0.00211688  0.01031406]
 [ 0.00082289 -0.01540136 -0.01350008]
 [-0.00314927  0.01165996  0.01627473]]
    b: 
[[0.]
 [0.]
 [0.]
 [0.]]


In [11]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == 'relu':
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    elif activation == 'sigmoid':
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    assert A.shape == (W.shape[0], A_prev.shape[1])
    cache = (linear_cache, activation_cache)
    return A, cache
        

In [12]:
parameters = initialize_parameters([3,4,5])
A_prev = np.random.randn(3,1)
W = parameters['W1']
b = parameters['b1']
A, cache = linear_activation_forward(A_prev, W, b, activation='relu')
print("A_prev: ")
print(A_prev)
print("Z: ")
print(cache[1])
print("A: ")
print(A)
print("Cache: ")
print(cache)
# print("    A: ")
# print((cache[0][0]))
# print("    W: ")
# print((cache[0][1]))
# print("    b: ")
# print((cache[0][2]))
# print("    Z: ")
# print((cache[1]))

A_prev: 
[[-1.01538002]
 [-1.36762589]
 [ 0.72259254]]
Z: 
[[ 0.00235079]
 [-0.01272416]
 [-0.00199401]
 [ 0.00022851]]
A: 
[[ 0.00235079]
 [-0.        ]
 [-0.        ]
 [ 0.00022851]]
Cache: 
((array([[-1.01538002],
       [-1.36762589],
       [ 0.72259254]]), array([[-3.03515004e-02,  1.40313670e-02, -1.28396723e-02],
       [ 5.52227312e-03,  1.09084145e-03, -7.78460027e-03],
       [ 1.80848074e-03,  9.14709493e-05, -4.51389186e-05],
       [-5.25879580e-03,  1.00833974e-02,  1.20111295e-02]]), array([[0.],
       [0.],
       [0.],
       [0.]])), array([[ 0.00235079],
       [-0.01272416],
       [-0.00199401],
       [ 0.00022851]]))


In [13]:
def full_forward_pass(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    
    for l in range(1, L):
        A, cache = linear_activation_forward(A, parameters['W' + str(l)], parameters['b'+ str(l)], activation = 'relu')
        caches.append(cache)
    
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = 'sigmoid')
    caches.append(cache)
    return AL, caches

In [14]:
layer_dims = [3,5,8,6,3,2]
parameters = initialize_parameters(layer_dims)
X = np.random.randn(3,1)
AL, caches = full_forward_pass(X, parameters)
print("AL:  ")
print(AL)
print("caches:")
print(caches)

AL:  
[[0.5]
 [0.5]]
caches:
[((array([[ 0.22128863],
       [ 0.01460948],
       [-1.73562033]]), array([[-0.01079376,  0.00559298, -0.00841057],
       [-0.01379845, -0.00410019, -0.02194486],
       [-0.01298333, -0.01743026, -0.01620041],
       [-0.00287286, -0.00293349,  0.01160664],
       [-0.0025644 , -0.00965737, -0.00845371]]), array([[0.],
       [0.],
       [0.],
       [0.],
       [0.]])), array([[ 0.01229074],
       [ 0.0349746 ],
       [ 0.02499005],
       [-0.02082331],
       [ 0.01396387]])), ((array([[ 0.01229074],
       [ 0.0349746 ],
       [ 0.02499005],
       [-0.        ],
       [ 0.01396387]]), array([[-0.00191052,  0.02363347,  0.00208426, -0.00907032,  0.00159932],
       [ 0.00208367, -0.00015643, -0.00479322, -0.00405261,  0.0159644 ],
       [ 0.00378811,  0.0084982 ,  0.00020738,  0.01366372, -0.00703907],
       [-0.00570633, -0.00240464, -0.00407209,  0.00059905,  0.00034705],
       [ 0.00409462,  0.02296791,  0.00872095,  0.00371393, -0.0101

#### Compute Cost

In [15]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1/m) * np.sum(np.multiply(Y, np.log(AL)) + np.multiply((1-Y), np.log(1- AL)))
    cost = np.squeeze(cost)
    assert cost.shape == ()
    return cost

In [19]:
AL = np.absolute(np.random.randn(4,1)*0.1)
Y = np.absolute(np.random.randn(4,1))
print("AL: ")
print(AL)
print("Y:  ")
print(Y)
cost = compute_cost(AL,Y)
print("Cost:")
print(cost)

AL: 
[[0.0294794 ]
 [0.04141099]
 [0.02221734]
 [0.11293798]]
Y:  
[[0.27483815]
 [0.80474374]
 [1.80326858]
 [0.99958699]]
Cost:
12.587824480229388


#### Backward Propogation

In [20]:
def linear_backward(dZ, cache):
    A_prev , W, b = cache
    m = A_prev.shape[1]
    
    dW = (1/m)* np.dot(dZ, A_prev.T)
    db = (1/m)* np.sum(dZ, axis = 1, keepdims= True)
    
    dA_prev = np.dot(W.T, dZ)
    
    assert dW.shape == W.shape
    assert db.shape == b.shape
    assert dA_prev  == A_prev.shape
    return dA_prev, dW, db

In [48]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == 'relu':
        dZ = relu_gradient(dA, activation_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_gradient(dA, activation_cache)
    
    
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

In [49]:
def full_backward_prop(AL, Y, caches):
    
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(Al.shape)
    
    # Initialize Backprop with dAL
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    # Final layer with sigmoid activation
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L-1)], grads["db" + str(L-1)] = linear_activation_backward(dAL, current_cache, activation= 'sigmoid')
    
    # Loop from L-2 layer to layer 0
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA"+ str(l+1)], current_cache, activation= 'relu')
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l+1)] = dW_temp
        grads["db" + str(l+1)] = db_temp
    
    return grads

#### Updating Parameters

In [50]:
def update_parameters(parameters, grads, learning_rate):
    L = parameters //2
    
    for l in range(L):
        parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]
    
    return parameters