## Deep Neural Network (L hidden layers)

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [2]:
# Loading the data (cat/non-cat)
def load_dataset():
    """Loads the Cat vs Non-Cat dataset

    Returns
    -------
    X_train, y_train, X_test, y_test, classes: Arrays
    Dataset splitted into train and test with classes
    """
    train_dataset = h5py.File('../datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])

    test_dataset = h5py.File('../datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])

    classes = np.array(test_dataset["list_classes"][:])
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [3]:
# Calculating sigmoid
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

In [4]:
# Calculating tanh
def tanh(Z):
    return np.tanh(Z)

In [5]:
# Calculating relu
def relu(Z):
    return np.maximum(Z, 0)

In [6]:
# Calculating leaky-relu
def leaky_relu(Z, alpha = 0.01):
    np.where(Z > 0, Z, Z * alpha)

In [7]:
# Calculating first derivative of sigmoid
def dif_sigmoid(Z):
    return (1-np.power(Z, 2))

In [8]:
# Calculating first derivative of tanh
def dif_tanh(Z):
    return 1-(tanh(Z)**2)

In [9]:
# Calculating first derivative of relu
def dif_relu(Z):
    return (Z>0).astype(Z.dtype)

In [10]:
# Calculating first derivative of leaky relu
def dif_leaky_relu(Z, alpha = 0.01):
    dz = np.ones_like(Z)
    dz[Z < 0] = alpha
    return dz

In [11]:
#Get activation function
def activation_fun(name):
    if name=='relu':
        return relu
    elif name=='sigmoid':
        return sigmoid
    elif name=='leaky_relu':
        return leaky_relu
    elif name=='tanh':
        return tanh
    else:
        return tanh

In [12]:
#Get first derivative of activation function
def derivative_activation(name):
    if name=='relu':
        return dif_relu
    elif name=='sigmoid':
        return dif_sigmoid
    elif name=='leaky_relu':
        return dif_leaky_relu
    elif name=='tanh':
        return dif_tanh
    else:
        return dif_tanh

In [13]:
# Printing the shape of the training and testing data
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
print('train_set_x_orig shape', train_set_x_orig.shape)
print('train_set_y_orig',train_set_y_orig.shape)
print("test_set_x_orig",test_set_x_orig.shape)
print("test_set_y_orig",test_set_y_orig.shape)
print('classes',classes.shape)

train_set_x_orig shape (209, 64, 64, 3)
train_set_y_orig (209,)
test_set_x_orig (50, 64, 64, 3)
test_set_y_orig (50,)
classes (2,)


In [14]:
# Reshape the training and test examples
def preprocess(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig):
    train_x = train_set_x_orig.reshape(train_set_x_orig.shape[0], train_set_x_orig.shape[1]*train_set_x_orig.shape[2]*train_set_x_orig.shape[3])/255.
    test_x = test_set_x_orig.reshape(test_set_x_orig.shape[0],test_set_x_orig.shape[1]*test_set_x_orig.shape[2]*test_set_x_orig.shape[3])/255.
    train_y = train_set_y_orig.reshape(-1,1)
    test_y = test_set_y_orig.reshape(-1,1)
    print('train_x shape', train_x.shape)
    print('train_y',train_y.shape)
    print("test_x",test_x.shape)
    print("test_y",test_y.shape)
    return train_x,test_x,train_y,test_y

In [38]:
#Defining initail weights and bias to dictanary w and b
def initial_weights(X,Y,h_nodes,hidden_layer):
    """
    This function creates a vector of zeros of shape (X.shape[1], 1) for w and initializes b to 0.
    
    Argument:
    X -- training dataset
    h_nodes -- number of hidden nodes in each hidden laayer, list
    hidden_layer -- number of hidden layers
    
    Returns:
    w -- dict of initialized vector of shape (X.shape[1], hidden_nodes)
    b -- dict of initialized scalar (corresponds to the bias)
    """
    np.random.seed(42) 
    x_nodes = X.shape[1]
    y_nodes = Y.shape[1]

    w = {}
    b = {} 
    
    for i in range(len(h_nodes)):
        if i==0:
            n_l_1 = x_nodes
        else:
            n_l_1 = h_nodes[i-1]
        
        w[i] = np.random.randn(n_l_1 ,h_nodes[i])*np.sqrt(2/n_l_1)
        b[i] = np.random.randn(1,h_nodes[i]) * np.sqrt(2/n_l_1)
 
    return w,b

In [39]:
# FORWARD PROPAGATION
def forward_propagate(X,w,b, activation,hidden_layer,h_nodes):
    """
    This functions performs forward propagation and calculates output value
    
    Argument:
    X -- training dataset
    w -- dict of weights
    b -- dict of bias
    activationion -- list of actiations used at particular hidden layer
    hidden_layer -- number of hidden layers, integer
    h_nodes -- number of hidden nodes in each hidden laayer, list
    
    Returns:
    A -- yhat for the training data, dict
    Z -- Dot product between X and w , dict
    """
    m = X.shape[0]
    Z = {}
    A = {}    
    for l in range(len(h_nodes)):
#         print(l)
        if l == 0:
            input_X = X
        else:
            input_X = A[l-1]
        Z[l] = (np.dot(input_X,w[l])+b[l])
        A[l] = activation_fun(activation[l])(Z[l])
    
    return Z, A

In [40]:
def costfunction(Y,A):   
    m = Y.shape[0]
    last_index = len(A)-1
    cost = np.nansum(-1/m*np.sum(Y*np.log(A[last_index]) + (1-Y)*np.log(1-A[last_index])))
    return cost

In [41]:
def back_prpagate(X,Y,Z,A,w,b,activation, hidden_layer,h_nodes):

    m = X.shape[0]
    L= hidden_layer 
#     dz2 = (A2-Y)
#     dw2 = 1/m*(np.dot(A1.T,dz2))
#     db2 = 1/m*(np.sum(dz2, axis=0, keepdims=True))
    
#     dz1 = np.dot(dz2, w2.T) * activation_fun(activation)(A1)
#     dw1 = 1/m*(np.dot(X.T,dz1))
#     db1 = 1/m*(np.sum(dz1, axis=0, keepdims=True))    
    dz = {}
    da = {}
    dw = {}
    db = {}
    for l in range(len(h_nodes)-1, -1, -1):
#         print('A shape', len(A))
        if l==len(h_nodes)-1:
            dz[l] = (A[l] - Y)
            dw[l] = (1./m * np.dot(A[l-1].T, dz[l]))
            db[l] = (1./m * np.sum(dz[l]))

        else:
            dz[l] = ((np.dot(dz[l+1], w[l+1].T)) * derivative_activation(activation[l])(Z[l]))
            if l!=0:
                input_X = A[l-1]
            else:
                input_X = X
            dw[l] = (1./m * np.dot(input_X.T, dz[l]))
            db[l] = (1./m * np.sum(dz[l]))
                
    return dz,dw,db

In [42]:
def update_weights(w,b,dw,db,learning_rate,hidden_layer,h_nodes):
    L=hidden_layer
    for i in range(len(h_nodes)):
        w[i] = w[i] - learning_rate*dw[i] 
        b[i] = b[i] - learning_rate*db[i]
    return w,b

In [46]:
def train_model(X,Y,test_x,test_y,w,b, learning_rate,num_iterations,activation,hidden_layer,h_nodes):
    for i in range(num_iterations):
        
        Z,A = forward_propagate(X,w,b,activation,hidden_layer,h_nodes)
        cost = costfunction(Y,A)
#         print('cost', cost)
        dz,dw,db = back_prpagate(X,Y,Z,A,w,b,activation,hidden_layer,h_nodes)
        w,b = update_weights(w,b,dw,db,learning_rate,hidden_layer,h_nodes)
        
        if i%(num_iterations/10) == 0:
#             print(cost)
            _,A_train = forward_propagate(X,w,b, activation,hidden_layer,h_nodes)
            A_train1 = np.where(A_train[len(A_train)-1]>0.5 , 1, 0)
        
            _,A_test = forward_propagate(test_x,w,b,activation,hidden_layer,h_nodes)
#             print('A_train len', len(A_train))
#             print('A_test len', len(A_test))
            A_test1 = np.where(A_test[len(A_test)-1]>0.5 , 1, 0)
#             print('A_train1',A_train1)
#             print('A_test1', A_test1)
            acc_train = accuracy_score(Y, A_train1)
#             print('shape of A_train1',A_train1.shape)
#             print('shape of A_test1',A_test1.shape)
#             print('shape of y_test',test_y.shape)
            acc_test = accuracy_score(test_y , A_test1)
        
            print('Iteration: ', i, end = '')
            print('\tLoss {:.4f}\t'.format(cost), end = '')
            print('\tTraining Accuracy: {:.4f}\t'.format(acc_train),end = '')
            print('Testing Accuracy: {:.4f}'.format(acc_test))
    return w,b,dw,db, cost

In [47]:
def model(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes, num_iterations, learning_rate):
    train_x,test_x,train_y,test_y = preprocess(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig)
    hidden_layer = 4
    h_nodes = [20,16,8,4,train_y.shape[1]]
    activation = ['relu','relu','relu','relu','sigmoid']
    w,b = initial_weights(train_x,train_y,h_nodes,hidden_layer)
    w,b,dw,db,cost = train_model(train_x,train_y,test_x,test_y,w,b, learning_rate = learning_rate,
                                 num_iterations = num_iterations,activation = activation, hidden_layer = hidden_layer,h_nodes = h_nodes )

In [50]:
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
# num_iterations = int(input("Enter iterations : "))
# learning_rate = float(input("Enter learning rate : "))
num_iterations = 1000
learning_rate = 0.005
model(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes, num_iterations, learning_rate)

train_x shape (209, 12288)
train_y (209, 1)
test_x (50, 12288)
test_y (50, 1)
Iteration:  0	Loss 0.6441		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  100	Loss 0.6080		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  200	Loss 0.5654		Training Accuracy: 0.6555	Testing Accuracy: 0.3600
Iteration:  300	Loss 0.4946		Training Accuracy: 0.7129	Testing Accuracy: 0.4600
Iteration:  400	Loss 0.4383		Training Accuracy: 0.7416	Testing Accuracy: 0.4600
Iteration:  500	Loss 0.3816		Training Accuracy: 0.7799	Testing Accuracy: 0.5200
Iteration:  600	Loss 0.3093		Training Accuracy: 0.8373	Testing Accuracy: 0.5600
Iteration:  700	Loss 0.2454		Training Accuracy: 0.8900	Testing Accuracy: 0.6800
Iteration:  800	Loss 0.1615		Training Accuracy: 0.9522	Testing Accuracy: 0.7000
Iteration:  900	Loss 0.0984		Training Accuracy: 0.9904	Testing Accuracy: 0.6800
