## DroupOut Reguralization

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [19]:
# Loading the data (cat/non-cat)
def load_dataset():
    """Loads the Cat vs Non-Cat dataset

    Returns
    -------
    X_train, y_train, X_test, y_test, classes: Arrays
    Dataset splitted into train and test with classes
    """
    train_dataset = h5py.File('train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])

    test_dataset = h5py.File('test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])

    classes = np.array(test_dataset["list_classes"][:])
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [20]:
# Calculating sigmoid
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

In [21]:
# Calculating tanh
def tanh(Z):
    return np.tanh(Z)

In [22]:
# Calculating relu
def relu(Z):
    return np.maximum(Z, 0)

In [23]:
# Calculating leaky-relu
def leaky_relu(Z, alpha = 0.01):
    np.where(Z > 0, Z, Z * alpha)

In [24]:
# Calculating first derivative of sigmoid
def dif_sigmoid(Z):
    return (1-np.power(Z, 2))

In [25]:
# Calculating first derivative of tanh
def dif_tanh(Z):
    return 1-(tanh(Z)**2)

In [26]:
# Calculating first derivative of relu
def dif_relu(Z):
    return (Z>0).astype(Z.dtype)

In [27]:
# Calculating first derivative of leaky relu
def dif_leaky_relu(Z, alpha = 0.01):
    dz = np.ones_like(Z)
    dz[Z < 0] = alpha
    return dz

In [28]:
#Get activation function
def activation_fun(name):
    if name=='relu':
        return relu
    elif name=='sigmoid':
        return sigmoid
    elif name=='leaky_relu':
        return leaky_relu
    elif name=='tanh':
        return tanh
    else:
        return tanh

In [29]:
#Get first derivative of activation function
def derivative_activation(name):
    if name=='relu':
        return dif_relu
    elif name=='sigmoid':
        return dif_sigmoid
    elif name=='leaky_relu':
        return dif_leaky_relu
    elif name=='tanh':
        return dif_tanh
    else:
        return dif_tanh

In [30]:
# Printing the shape of the training and testing data
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
print('train_set_x_orig shape', train_set_x_orig.shape)
print('train_set_y_orig',train_set_y_orig.shape)
print("test_set_x_orig",test_set_x_orig.shape)
print("test_set_y_orig",test_set_y_orig.shape)
print('classes',classes.shape)

train_set_x_orig shape (209, 64, 64, 3)
train_set_y_orig (209,)
test_set_x_orig (50, 64, 64, 3)
test_set_y_orig (50,)
classes (2,)


In [31]:
# Reshape the training and test examples
def preprocess(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig):
    train_x = train_set_x_orig.reshape(train_set_x_orig.shape[0], train_set_x_orig.shape[1]*train_set_x_orig.shape[2]*train_set_x_orig.shape[3])/255.
    test_x = test_set_x_orig.reshape(test_set_x_orig.shape[0],test_set_x_orig.shape[1]*test_set_x_orig.shape[2]*test_set_x_orig.shape[3])/255.
    train_y = train_set_y_orig.reshape(-1,1)
    test_y = test_set_y_orig.reshape(-1,1)
    print('train_x shape', train_x.shape)
    print('train_y',train_y.shape)
    print("test_x",test_x.shape)
    print("test_y",test_y.shape)
    return train_x,test_x,train_y,test_y

In [32]:
#Defining initail weights and bias to dictanary w and b
def initial_weights(X,Y,h_nodes,hidden_layer):
    """
    This function creates a vector of zeros of shape (X.shape[1], 1) for w and initializes b to 0.
    
    Argument:
    X -- training dataset
    h_nodes -- number of hidden nodes in each hidden laayer, list
    hidden_layer -- number of hidden layers
    
    Returns:
    w -- dict of initialized vector of shape (X.shape[1], hidden_nodes)
    b -- dict of initialized scalar (corresponds to the bias)
    """
    np.random.seed(42) 
    x_nodes = X.shape[1]
    y_nodes = Y.shape[1]

    w = {}
    b = {} 
    
    for i in range(len(h_nodes)):
        if i==0:
            n_l_1 = x_nodes
        else:
            n_l_1 = h_nodes[i-1]
        
        w[i] = np.random.randn(n_l_1 ,h_nodes[i])*np.sqrt(2/n_l_1)
        b[i] = np.random.randn(1,h_nodes[i]) * np.sqrt(2/n_l_1)
 
    return w,b

In [33]:
# FORWARD PROPAGATION
def forward_propagate(X,w,b, activation,hidden_layer,h_nodes,drop_prob):
    """
    This functions performs forward propagation and calculates output value
    
    Argument:
    X -- training dataset
    w -- dict of weights
    b -- dict of bias
    activationion -- list of actiations used at particular hidden layer
    hidden_layer -- number of hidden layers, integer
    h_nodes -- number of hidden nodes in each hidden laayer, list
    
    Returns:
    A -- yhat for the training data, dict
    Z -- Dot product between X and w , dict
    """
    m = X.shape[0]
    keep_prob = 1 - drop_prob
    Z = {}
    A = {}    
    random_A = {}
    for l in range(len(h_nodes)):
#         print(l)
        if l == 0:
            input_X = X
        else:
            input_X = A[l-1]
        Z[l] = (np.dot(input_X,w[l])+b[l])
        A[l] = activation_fun(activation[l])(Z[l])
        
        if l!=len(h_nodes)-1:
            d = np.random.rand(A[l].shape[0],A[l].shape[1]) < keep_prob
#             random_A[l] = d
            A[l] = np.multiply(A[l] , d)
            A[l] = A[l]/keep_prob
    
    return Z, A

In [34]:
# Calculating loss using the cost function
def costfunction(Y,A):   
    """
    This function calculates the loss between the predicted and actual output
    
    Argument:
    Y -- actual output
    A -- predicted output, dict
    
    Returns:
    cost -- loss between the predicted and actual output
    """
    m = Y.shape[0]
    last_index = len(A)-1
    cost = np.nansum(-1/m*np.sum(Y*np.log(A[last_index]) + (1-Y)*np.log(1-A[last_index])))
    return cost

In [41]:
# BACKWARD PROPAGATION (TO FIND GRADIENT)
def back_prpagate(X,Y,Z,A,w,b,activation, hidden_layer,h_nodes,drop_prob):
    """Performs backward propagation and calculates derivative value for a layer

    Arguments:
    X -- array_like Data
    Y -- array_like True labels
    A -- predicted output, dict
    Z -- intermidiate dot product , dict
    w -- dict of weights
    b -- dict of bias
    activationion -- list of actiations used at particular hidden layer
    hidden_layer -- number of hidden layers, integer
    h_nodes -- number of hidden nodes in each hidden laayer, list    

    Returns:
    dw -- derivative of weight, dict
    db -- derivative of bias,dict
    dz -- cache,dict
    """

    m = X.shape[0]
    L= hidden_layer 
    keep_prob = 1-drop_prob
#     dz2 = (A2-Y)
#     dw2 = 1/m*(np.dot(A1.T,dz2))
#     db2 = 1/m*(np.sum(dz2, axis=0, keepdims=True))
    
#     dz1 = np.dot(dz2, w2.T) * activation_fun(activation)(A1)
#     dw1 = 1/m*(np.dot(X.T,dz1))
#     db1 = 1/m*(np.sum(dz1, axis=0, keepdims=True))    
    dz = {}
    da = {}
    dw = {}
    db = {}
    for l in range(len(h_nodes)-1, -1, -1):
#         print('A shape', len(A))
        if l==len(h_nodes)-1:
            dz[l] = (A[l] - Y)
            dw[l] = (1./m * np.dot(A[l-1].T, dz[l]))
            db[l] = (1./m * np.sum(dz[l]))

        else:
#             model[i].da *= dropped_neurons[i]
#       model[i].da /= keep_prob
#       model[i].dz = np.multiply(np.int64(model[i].A>0), model[i].da) * get_derivative_activation_function(model[i].activation)(model[i].Z)
            da[l] = (np.dot(dz[l+1], w[l+1].T))
            dz[l] = (da[l] * derivative_activation(activation[l])(Z[l]))
            if l!=0:
                input_X = A[l-1]
            else:
                input_X = X
            dw[l] = (1./m * np.dot(input_X.T, dz[l]))
            db[l] = (1./m * np.sum(dz[l]))
                
    return dz,dw,db

In [42]:
#Update weights and bias
def update_weights(w,b,dw,db,learning_rate,hidden_layer,h_nodes):
    """
    This function updates the weight and bias
    
    Argument:
    w -- weight
    b -- bias
    dw -- derivative of weight
    db -- derivative of bias
    learning_rate --learning_rate , float
    hidden_layer -- number of hidden layers, integer
    h_nodes -- number of hidden nodes in each hidden laayer, list     
    
    Returns:
    w -- weight , dict
    b -- bias , dict
    """
    L=hidden_layer
    for i in range(len(h_nodes)):
        w[i] = w[i] - learning_rate*dw[i] 
        b[i] = b[i] - learning_rate*db[i]
    return w,b

In [43]:
#Training the model
def train_model(X,Y,test_x,test_y,w,b, learning_rate,num_iterations,activation,hidden_layer,h_nodes,drop_prob):
    """
    This function  trains the model with the number of iterations
    
    Arguments:
    w -- weight , dict
    b -- bias, dict
    X -- training data 
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape (1, number of examples)
    num_iterations -- number of iterations of the optimization loop
    learning_rate -- learning rate of the gradient descent update rule
    activationion -- list of actiations used at particular hidden layer
    hidden_layer -- number of hidden layers, integer
    h_nodes -- number of hidden nodes in each hidden laayer, list 
    
    Returns:
    w -- weight
    b -- bias
    dw -- derivative of weight
    db -- derivative of bias
    cost -- loss     
    """
    for i in range(num_iterations):
        
        Z,A = forward_propagate(X,w,b,activation,hidden_layer,h_nodes,drop_prob)
        cost = costfunction(Y,A)
        dz,dw,db = back_prpagate(X,Y,Z,A,w,b,activation,hidden_layer,h_nodes,drop_prob)
        w,b = update_weights(w,b,dw,db,learning_rate,hidden_layer,h_nodes)
        
        if i%(num_iterations/10) == 0:

            _,A_train = forward_propagate(X,w,b, activation,hidden_layer,h_nodes,drop_prob)
            A_train1 = np.where(A_train[len(A_train)-1]>0.5 , 1, 0)
        
            _,A_test = forward_propagate(test_x,w,b,activation,hidden_layer,h_nodes,drop_prob)
            A_test1 = np.where(A_test[len(A_test)-1]>0.5 , 1, 0)
            acc_train = accuracy_score(Y, A_train1)
            acc_test = accuracy_score(test_y , A_test1)
        
            print('Iteration: ', i, end = '')
            print('\tLoss {:.4f}\t'.format(cost), end = '')
            print('\tTraining Accuracy: {:.4f}\t'.format(acc_train),end = '')
            print('Testing Accuracy: {:.4f}'.format(acc_test))
    return w,b,dw,db, cost

In [44]:
def model(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes, num_iterations, learning_rate,drop_prob):
    train_x,test_x,train_y,test_y = preprocess(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig)
    hidden_layer = 4
    h_nodes = [20,16,8,4,train_y.shape[1]]
    activation = ['relu','relu','relu','relu','sigmoid']
    w,b = initial_weights(train_x,train_y,h_nodes,hidden_layer)
    w,b,dw,db,cost = train_model(train_x,train_y,test_x,test_y,w,b,learning_rate=learning_rate,num_iterations=num_iterations,
                                 activation = activation, hidden_layer = hidden_layer,h_nodes = h_nodes,drop_prob = drop_prob)

In [45]:
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
# num_iterations = int(input("Enter iterations : "))
# learning_rate = float(input("Enter learning rate : "))
num_iterations = 10
learning_rate = 0.005
drop_prob = 0.2
model(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes, num_iterations, learning_rate,drop_prob)

train_x shape (209, 12288)
train_y (209, 1)
test_x (50, 12288)
test_y (50, 1)
Iteration:  0	Loss 0.6436		Training Accuracy: 0.6555	Testing Accuracy: 0.3200
Iteration:  1	Loss 0.6550		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  2	Loss 0.6495		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  3	Loss 0.6485		Training Accuracy: 0.6507	Testing Accuracy: 0.3400
Iteration:  4	Loss 0.6502		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  5	Loss 0.6519		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  6	Loss 0.6394		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  7	Loss 0.6322		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  8	Loss 0.6485		Training Accuracy: 0.6555	Testing Accuracy: 0.3400
Iteration:  9	Loss 0.6553		Training Accuracy: 0.6555	Testing Accuracy: 0.3400


#### Note - We can try different hyper-parameters to get better testing accuracy