# Related Libraries Importing

In [1]:
import numpy as np
import os 

# Helper Functions Defination

In [2]:
# Activation functions defination
def activation_fun(x,activation_type= 'sigmoid'):
    
    if activation_type not in ['sigmoid','tanh', 'relu','lrelu']:
        raise ValueError(" activation type must be in ['sigmoid','tanh', 'relu','lrelu']")
    
    if activation_type == 'sigmoid':
        return (1/(1+np.exp(-x)))
    
    elif activation_type == 'tanh':
        return np.tanh(x)
    
    elif activation_type == 'relu':
        return np.maximum(0.0,x)
    
    elif activation_type == 'lrelu':
        return np.maximum(0.01 * x, x)

In [3]:
# Activation fumnctions derivatives
def act_derivative(x, activation_type= 'sigmoid'):

    if activation_type not in ['sigmoid','tanh', 'relu','lrelu']:
        raise ValueError(" activation type must be in ['sigmoid','tanh', 'relu','lrelu']")    
    
    if activation_type == 'sigmoid':
        return activation_fun(x, activation_type= activation_type)\
        * (1- activation_fun(x, activation_type=activation_type))
    
    elif activation_type == 'tanh':
        return 1- (activation_fun(x,activation_type= activation_type))**2
    
    elif activation_type == 'relu':
        return np.where(x <= 0.0, 0.0,1.0)
    
    elif activation_type == 'lrelu':
        return np.where(x <= 0.0, 0.01 * x,1.0)

In [4]:
# Random data generation for function
def data_generator(num_features=10, num_examples=1000,train_p=0.8):
    
    dataset = np.random.randint(0,20,size=(num_features,num_examples))
    labels = np.random.randint(0,2,(1,num_examples))
    
    x_train = dataset[:,:int(num_examples * train_p)]
    y_train = labels[:,:int(num_examples * train_p)]
    x_test = dataset[:,int(num_examples * train_p) :]
    y_test = labels[:,int(num_examples * train_p):]
    return x_train, y_train, x_test, y_test

# Deep Neural Network Formulation

In [5]:
x_train, y_train, x_test, y_test = data_generator(num_features=5, num_examples=10, train_p=0.8)

In [6]:
# parameters initiation
def initialize_parameters(layer_dims):
    
    parameters ={}
    L = len(layer_dims)
    for l in range(1,L):
        parameters['W'+ str(l)] =  np.random.randn(layer_dims[l],layer_dims[l-1])*0.1
        parameters['b'+ str(l)] =  np.zeros((layer_dims[l],1))
    
    return parameters

In [7]:
dims = [5,3,3,1]
parameters = initialize_parameters(dims)
parameters

{'W1': array([[ 0.08087733,  0.0664122 , -0.12097279,  0.03025323, -0.09484008],
        [ 0.18561727, -0.16058276,  0.10141466,  0.03732837, -0.02113285],
        [-0.0697803 ,  0.11311399,  0.14349171,  0.01416696, -0.00130067]]),
 'b1': array([[0.],
        [0.],
        [0.]]),
 'W2': array([[-0.19898727, -0.03565914,  0.07408392],
        [ 0.01240134,  0.13237066, -0.08097276],
        [-0.09831711, -0.04112334, -0.00549575]]),
 'b2': array([[0.],
        [0.],
        [0.]]),
 'W3': array([[ 0.05263844, -0.1264592 , -0.10143666]]),
 'b3': array([[0.]])}

In [8]:
def forward_prop(A_prev, parameters, activation_order):
    
    cache = {}
    
    for  l,activation in enumerate(activation_order):
        
        W = parameters['W' + str(l+1)]
        b = parameters['b' + str(l+1)]
       
        Z = np.dot(W,A_prev) + b
        A = activation_fun(Z,activation_type= activation)
        
        cache['Z' + str(l+1)]= Z
        cache['A' + str(l+1)]= A
        
        A_prev = A
    
    return cache

In [9]:
activations = ['relu','relu','sigmoid']
cache = forward_prop(x_train,parameters,activations)
cache

{'Z1': array([[-1.00132224,  0.21216916,  1.15334123, -0.53510064,  1.53005562,
         -0.37912496, -1.46926466,  0.24713178],
        [ 1.79620196,  1.05677128,  2.15841101,  2.53568404,  3.55308581,
          2.96615513,  4.83402986,  3.76568277],
        [ 1.81835641,  0.874745  ,  0.42301606,  1.55134248, -0.33169235,
          0.04865744,  1.9165236 ,  0.13493678]]),
 'A1': array([[0.        , 0.21216916, 1.15334123, 0.        , 1.53005562,
         0.        , 0.        , 0.24713178],
        [1.79620196, 1.05677128, 2.15841101, 2.53568404, 3.55308581,
         2.96615513, 4.83402986, 3.76568277],
        [1.81835641, 0.874745  , 0.42301606, 1.55134248, 0.        ,
         0.04865744, 1.9165236 , 0.13493678]]),
 'Z2': array([[ 0.07065996, -0.01509797, -0.27512861,  0.02450923, -0.43116156,
         -0.1021658 , -0.03039375, -0.17346044],
        [ 0.0905271 ,  0.07168618,  0.26576049,  0.21003369,  0.48929906,
          0.38869199,  0.48469752,  0.49060448],
        [-0.083859

In [10]:
def compute_cost(Y, cache):
    
    AL = cache[(list(cache.keys())[-1])]
    m = Y.shape[1]
    
    cost = np.squeeze((-1/m)*(np.dot(Y,np.log(AL).T)+ np.dot((1-Y),np.log(1-AL).T)))
    assert(cost.shape == ())
    
    return cost

In [11]:
cost = compute_cost(y_train,cache)

In [12]:
def backward_prop(X,Y,parameters,cache, activation_order):
    
    AL = cache['A'+str(len(activation_order))]
    grads = {}
    dA =  - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    m = Y.shape[1]
    
    
    for l, activation in reversed(list(enumerate(activation_order))):
        
        Z = cache['Z' + str(l+1)]
        
        if l == 0:
            A_prev = X
        else:
            A_prev = cache['A' + str(l)]
            
        W = parameters['W' + str(l+1)]
        
        dZ = dA * act_derivative(Z, activation_type= activation)
        dW = (1/m) * np.dot(dZ,A_prev.T)
        db =  (1/m) * np.sum(np.dot(dZ, A_prev.T), axis = 1, keepdims =True)
        dA = np.dot(W.T,dZ)
        
        grads['dW'+ str(l+1)] = dW
        grads['db'+ str(l+1)] = db
    
    return grads

In [13]:
grads = backward_prop(x_train,y_train, parameters,cache,activations)

In [14]:
def optimize(parameters, grads, learning_rate = 0.01):
    
    L = int(len(grads)/2)+1
    
    for l in range(1,L):
        
        W = parameters['W' + str(l)]
        b = parameters['b' + str(l)]
        
        dW = grads['dW' + str(l)]
        db = grads['db' + str(l)]
        
        parameters['W' + str(l)] = W - learning_rate * dW
        parameters['b' + str(l)] = b - learning_rate * db
    
    return parameters

In [15]:
parameters = optimize(parameters,grads, learning_rate=0.02)

In [16]:
def model(X,Y, layer_dims, activation_order, num_iterations=5000,learning_rate = 0.01,print_cost=False):
    
    parameters = initialize_parameters(layer_dims)
    
    for i in range(num_iterations):
    
        cache = forward_prop(X, parameters, activation_order)
        
        cost = compute_cost(Y,cache)
        
        grads = backward_prop(X,Y,parameters,cache,activation_order)
        
        parameters = optimize(parameters,grads,learning_rate)
        
        if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    return parameters

In [17]:
parameters = model(x_train,y_train,dims,activations,print_cost=True, learning_rate=0.1)

Cost after iteration 0: 0.696959
Cost after iteration 1000: 0.637997
Cost after iteration 2000: 2.437065
Cost after iteration 3000: 0.566539
Cost after iteration 4000: 2.473683


In [18]:
def predict(X, y, parameters, activation_order):
    
    m = X.shape[1]
    p = np.zeros((1,m))
    
    # Forward propagation
    cache = forward_prop(X,parameters,activation_order)
    probas = cache[(list(cache.keys())[-1])]

    # convert probas to 0/1 predictions
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    
    print("Accuracy: "  + str(np.sum((p == y)/m)))

# Real example

In [2]:
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
# for windows os flip the forward slash (/) into backward slash (\)
os.chdir('../utils')
from lr_utils import load_dataset

%matplotlib inline

ModuleNotFoundError: No module named 'h5py'

In [20]:
train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()

NameError: name 'load_dataset' is not defined

In [21]:
index = 19
plt.imshow(train_x_orig[index])
print ("y = " + str(train_y[0,index]) + ". It's a " + classes[train_y[0,index]].decode("utf-8") +  " picture.")

NameError: name 'train_x_orig' is not defined

In [25]:
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]

print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y.shape))

Number of training examples: 209
Number of testing examples: 50
Each image is of size: (64, 64, 3)
train_x_orig shape: (209, 64, 64, 3)
train_y shape: (1, 209)
test_x_orig shape: (50, 64, 64, 3)
test_y shape: (1, 50)


In [26]:
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

train_x's shape: (12288, 209)
test_x's shape: (12288, 50)


In [27]:
dims = [12288,20,7,5,1]
activations = ['relu','relu','relu','sigmoid']
parameters = model(train_x,train_y,dims,activations,print_cost=True, learning_rate=0.0075,num_iterations= 6100)

Cost after iteration 0: 0.704070
Cost after iteration 1000: 0.640192
Cost after iteration 2000: 0.600308
Cost after iteration 3000: 0.461689
Cost after iteration 4000: 0.142571
Cost after iteration 5000: 0.030624
Cost after iteration 6000: 0.004066


In [28]:
predict(train_x,train_y,parameters,activations)

Accuracy: 0.9999999999999998


In [29]:
predict(test_x,test_y,parameters,activations)

Accuracy: 0.74


# This is the end of the story