# Designing the Model

In [20]:
import numpy as np

<img src="DNN.png" style="width:800px;height:500px;">


** X -> *[Linear -> ReLU] (L-1 times)* -> Linear -> Sigmoid -> Yhat **

In [2]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

In [5]:
def sigmoid_backward(dA, cache):
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    return dZ

In [3]:
def relu(Z):
    A = np.maximum(0,Z)
    
    cache = Z 
    return A, cache

In [4]:
def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) # Copying the list to not assign to the original
    
    dZ[Z <= 0] = 0 # Derivative, DZ is 0 where Z is negative
    
    return dZ

In [68]:
def initialize_parameters_dnn(layer_dims):
    
    parameters = {}
    L = len(layer_dims) # number of layers in the network

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
    return parameters

In [69]:
def linear_forward(A, W, b):
    Z = W.dot(A) + b
    
    assert(Z.shape == (W.shape[0], A.shape[1])) # check the dims
    cache = (A, W, b)
    
    return Z, cache

In [70]:
def linear_activation_forward(A_prev, W, b, activation):
   
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    
    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache


In [71]:
def dnn_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2 # number of layers in the neural network
    
    for l in range(1, L):
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")
        caches.append(cache)
    
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")
    caches.append(cache)
    
    assert(AL.shape == (1,X.shape[1]))
            
    return AL, caches

In [72]:
def compute_cost(AL, Y):
    m = Y.shape[1]

    cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
    
    cost = np.squeeze(cost) # make sure cost is just a number
    assert(cost.shape == ())
    
    return cost

In [73]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = 1./m * np.dot(dZ,A_prev.T)
    db = 1./m * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T,dZ)
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

In [74]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

In [75]:
def dnn_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # Make AL and Y the same shape
    
    # Initializing the backpropagation
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    current_cache = caches[L-1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation="sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 2)], current_cache, activation="relu")
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

In [76]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2 

    for l in range(1, L+1):
        parameters["W" + str(l)] = parameters["W" + str(l)] - learning_rate*grads["dW" + str(l)]
        parameters["b" + str(l)] = parameters["b" + str(l)] - learning_rate*grads["db" + str(l)]
        
    return parameters

In [77]:
def predict(parameters, X):
  
    m = X.shape[1]
    p = np.zeros((1,m))
    
    probas, caches = dnn_model_forward(X, parameters)

    # convert probas to 0/1 predictions
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    
    return p

In [80]:
def dnn_model(X, Y, layers_dims, learning_rate = 0.001, num_iterations = 3000, print_cost=False):
    np.random.seed(1)
    costs = []
    
    parameters = initialize_parameters_dnn(layers_dims)
    
    for i in range(1, num_iterations+1):

        # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
        AL, caches = dnn_model_forward(X, parameters)
        
        # Compute cost.
        cost = compute_cost(AL, Y)
    
        # Backward propagation.
        grads = dnn_model_backward(AL, Y, caches)
 
        # Update parameters.
        parameters = update_parameters(parameters, grads, learning_rate)
                
        if i % 100 == 0:
            if cost != [] and cost == costs[-1]:
                break
            costs.append(cost)
            # Print the cost every 100 training example
            if print_cost:
                print (f"Cost after iteration {i}: {cost}")
        
        if i % 1000 == 0:
            learning_rate = max(0.00001, learning_rate*.9)

    return parameters, costs

# Training

In [81]:
import h5py

train_dataset = h5py.File('datasets/train_cat.h5', "r")
test_dataset = h5py.File('datasets/test_cat.h5', "r")

train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

# Reshaping the target labels to a row Vector instead of column Vector
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) 
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

classes = np.array(['not-cat', 'cat']) # the list of classes

m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig[0].shape[0]

#flattening
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1)
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1)

#correct dim
train_set_x_flatten = train_set_x_flatten.T
test_set_x_flatten = test_set_x_flatten.T

# normalizing
# values were from 0 to 255
train_set_x = train_set_x_flatten/255.
test_set_x = test_set_x_flatten/255.

In [82]:
print ("train_set_x shape ->", train_set_x.shape)
print ("train_set_y_orig shape ->", train_set_y_orig.shape)
print ("test_set_x shape ->", test_set_x.shape)
print ("test_set_y_orig shape ->", test_set_y_orig.shape)

train_set_x shape -> (12288, 209)
train_set_y_orig shape -> (1, 209)
test_set_x shape -> (12288, 50)
test_set_y_orig shape -> (1, 50)


In [86]:
parameters, costs = dnn_model(X=train_set_x,
                                  Y=train_set_y_orig,
                                  layers_dims=[12288, 32, 1],
                                  learning_rate=0.01,
                                  num_iterations=100_000,
                                  print_cost=True)



Cost after iteration 100: 0.6408847211634631
Cost after iteration 200: 0.6051086742888109
Cost after iteration 300: 0.5372169320055384
Cost after iteration 400: 0.4560830383165973
Cost after iteration 500: 0.4343077275505305
Cost after iteration 600: 0.39201072328050396
Cost after iteration 700: 0.3478189823031774
Cost after iteration 800: 0.29618850570553235
Cost after iteration 900: 0.27782304506739613
Cost after iteration 1000: 0.16085157188896052
Cost after iteration 1100: 0.12416958357355942
Cost after iteration 1200: 0.1029317993012278
Cost after iteration 1300: 0.08055348297939466
Cost after iteration 1400: 0.06811042412572198
Cost after iteration 1500: 0.05680514327621626
Cost after iteration 1600: 0.046853102393372995
Cost after iteration 1700: 0.04021407814607501
Cost after iteration 1800: 0.03512772141956908
Cost after iteration 1900: 0.03098579397129617
Cost after iteration 2000: 0.027520288417277435
Cost after iteration 2100: 0.02490403958816167
Cost after iteration 2200: 

In [88]:
train_predictions = predict(parameters, train_set_x)
test_predictions = predict(parameters, test_set_x)
train_acc = 100 - np.mean(np.abs(train_predictions - train_set_y_orig)) * 100
test_acc = 100 - np.mean(np.abs(test_predictions - test_set_y_orig)) * 100
print(f"train accuracy: {train_acc}")
print(f"test accuracy: {test_acc}")

train accuracy: 100.0
test accuracy: 76.0
