## Shallow Neural Network (1 hidden layer)

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [2]:
# Loading the data (cat/non-cat)
def load_dataset():
    """Loads the Cat vs Non-Cat dataset

    Returns
    -------
    X_train, y_train, X_test, y_test, classes: Arrays
    Dataset splitted into train and test with classes
    """
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])

    classes = np.array(test_dataset["list_classes"][:])
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [3]:
# Calculating sigmoid
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

In [4]:
# Calculating tanh
def tanh(Z):
    return np.tanh(Z)

In [5]:
# Calculating relu
def relu(Z):
    return np.maximum(Z, 0)

In [6]:
# Calculating leaky-relu
def leaky_relu(Z, alpha = 0.01):
    np.where(Z > 0, Z, Z * alpha)

In [7]:
# Calculating first derivative of sigmoid
def dif_sigmoid(Z):
    return (1-np.power(Z, 2))

In [8]:
# Calculating first derivative of tanh
def dif_tanh(Z):
    return 1-(tanh(Z)**2)

In [9]:
# Calculating first derivative of relu
def dif_relu(Z):
    return (Z>0).astype(Z.dtype)

In [10]:
# Calculating first derivative of leaky relu
def dif_leaky_relu(Z, alpha = 0.01):
    dz = np.ones_like(Z)
    dz[Z < 0] = alpha
    return dz

In [11]:
#Get activation function
def activation_fun(name):
    if name=='relu':
        return relu
    elif name=='sigmoid':
        return sigmoid
    elif name=='leaky_relu':
        return leaky_relu
    elif name=='tanh':
        return tanh
    else:
        return tanh

In [12]:
#Get first derivative of activation function
def derivative_activation(name):
    if name=='relu':
        return dif_relu
    elif name=='sigmoid':
        return dif_sigmoid
    elif name=='leaky_relu':
        return dif_leaky_relu
    elif name=='tanh':
        return dif_tanh
    else:
        return dif_tanh

In [13]:
# Printing the shape of the training and testing data
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
print('train_set_x_orig shape', train_set_x_orig.shape)
print('train_set_y_orig',train_set_y_orig.shape)
print("test_set_x_orig",test_set_x_orig.shape)
print("test_set_y_orig",test_set_y_orig.shape)
print('classes',classes.shape)

train_set_x_orig shape (209, 64, 64, 3)
train_set_y_orig (209,)
test_set_x_orig (50, 64, 64, 3)
test_set_y_orig (50,)
classes (2,)


In [14]:
# Reshape the training and test examples
def preprocess(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig):
    train_x = train_set_x_orig.reshape(train_set_x_orig.shape[0], train_set_x_orig.shape[1]*train_set_x_orig.shape[2]*train_set_x_orig.shape[3])/255.
    test_x = test_set_x_orig.reshape(test_set_x_orig.shape[0],test_set_x_orig.shape[1]*test_set_x_orig.shape[2]*test_set_x_orig.shape[3])/255.
    train_y = train_set_y_orig.reshape(-1,1)
    test_y = test_set_y_orig.reshape(-1,1)
    print('train_x shape', train_x.shape)
    print('train_y',train_y.shape)
    print("test_x",test_x.shape)
    print("test_y",test_y.shape)
    return train_x,test_x,train_y,test_y

In [15]:
#Defining initail weights to w1,b1 and w2,b2
def initial_weights(X,hidden_nodes):
    """
    This function creates a vector of zeros of shape (X.shape[1], 1) for w and initializes b to 0.
    
    Argument:
    X -- training dataset
    hidden_nodes -- number of hidden nodes , integer
    
    Returns:
    w1 -- initialized vector of shape (X.shape[1], hidden_nodes)
    b1 -- initialized scalar (corresponds to the bias)
    w2 -- initialized vector of shape (hidden_nodes, 1)
    b2 -- initialized scalar (corresponds to the bias)
    """

    np.random.seed(2) # we set up a seed so that your output matches ours although the initialization is random.
    w1 = np.random.randn(X.shape[1] ,hidden_nodes)
    b1 = np.zeros([1,hidden_nodes])
    w2 = np.random.randn(hidden_nodes ,1)
    b2 = np.zeros([1,1])
    return w1,b1,w2,b2

In [16]:
# FORWARD PROPAGATION
def forward_propagate(X,w1,b1,w2,b2, activation='relu'):
    """
    This functions performs forward propagation and calculates output value
    
    Argument:
    X -- training dataset
    w1 -- weight between input and hidden layer
    b1 -- bias between input and hidden layer
    w2 -- weight between hidden and output layer
    b2 -- bias between hidden and output layer
    
    Returns:
    A1 -- yhat for the training data
    Z1 -- Dot product between X and w
    A2 -- yhat(predicted output) for the training data
    Z2 -- Dot product between A1, w
    """
    m = X.shape[0]
    
    Z1 = (np.dot(X,w1)+b1)
    A1 = activation_fun(activation)(Z1)
    Z2 = (np.dot(A1,w2) +b2)
    A2 = activation_fun('sigmoid')(Z2)
    
    return Z1,Z2,A1,A2

In [17]:
# Calculating loss using the cost function
def costfunction(Y,A2):
    """
    This function calculates the loss between the predicted and actual output
    
    Argument:
    Y -- actual output
    A2 -- predicted output
    
    Returns:
    cost -- loss between the predicted and actual output
    """
    m = Y.shape[0]
    cost = -1/m*np.sum(Y*np.log(A2) + (1-Y)*np.log(1-A2))
    return cost

In [18]:
# BACKWARD PROPAGATION (TO FIND GRADIENT)
def back_prpagate(X,Y,Z1,Z2,A1,A2,w1,b1,w2,b2,activation):
    """Performs backward propagation and calculates derivative value for a layer

    Arguments:
    X -- array_like Data
    Y -- array_like True labels
    A -- predicted output

    Returns:
    dw -- derivative of weight
    db -- derivative of bias
    dz -- cache
    """
    m = X.shape[0]
               
    dz2 = (A2-Y)
    dw2 = 1/m*(np.dot(A1.T,dz2))
    db2 = 1/m*(np.sum(dz2, axis=0, keepdims=True))
    
    dz1 = np.dot(dz2, w2.T) * activation_fun(activation)(A1)
    dw1 = 1/m*(np.dot(X.T,dz1))
    db1 = 1/m*(np.sum(dz1, axis=0, keepdims=True))
    
    return dz2,dw2,db2,dz1,dw1,db1

In [19]:
def update_weights(w1, b1,w2,b2,dw1,db1,dw2,db2,learning_rate):
    """
    This function updates the weight and bias
    
    Argument:
    w -- weight
    b -- bias
    dw -- derivative of weight
    db -- derivative of bias
    
    Returns:
    w1 -- weight between input and hidden layer
    b1 -- bias between input and hidden layer
    w2 -- weight between hidden and output layer
    b2 -- bias between hidden and output layer
    """
    w1 = w1-learning_rate*dw1
    b1 = b1-learning_rate*db1
    
    w2 = w2-learning_rate*dw2
    b2 = b2-learning_rate*db2
    return w1,b1,w2,b2

In [20]:
def train_model(X,Y,test_x,test_y,w1,b1,w2,b2, learning_rate = 0.01, num_iterations = 100,activation = 'relu'):
    """
    This function  trains the model with the number of iterations
    
    Arguments:
    w -- weight
    b -- bias, a scalar
    X -- training data 
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape (1, number of examples)
    num_iterations -- number of iterations of the optimization loop
    learning_rate -- learning rate of the gradient descent update rule
    
    Returns:
    w -- weight
    b -- bias
    dw -- derivative of weight
    db -- derivative of bias
    cost -- loss     
    """
    for i in range(num_iterations):
        
        Z1,Z2,A1,A2 = forward_propagate(X,w1,b1,w2,b2,activation)
        cost = costfunction(Y,A2)
        dz2,dw2,db2,dz1,dw1,db1 = back_prpagate(X,Y,Z1,Z2,A1,A2,w1,b1,w2,b2,activation)
        w1,b1,w2,b2 = update_weights(w1, b1,w2,b2,dw1,db1,dw2,db2,learning_rate)
        
        if i%(num_iterations/10) == 0:
#             print(cost)
            _,_,_,A_train = forward_propagate(X,w1,b1,w2,b2)
            A_train = np.where(A_train>0.5 , 1, 0)
        
            _,_,_,A_test = forward_propagate(test_x,w1,b1,w2,b2)
            A_test = np.where(A_test>0.5 , 1, 0)
            acc_train = accuracy_score(Y, A_train)
            acc_test = accuracy_score(test_y , A_test)
        
            print('Iteration: ', i, end = '')
            print('\t\tTraining Accuracy: {:.4f}\t'.format(acc_train),end = '')
            print('Testing Accuracy: {:.4f}'.format(acc_test))

    return w1,b1,w2,b2,dw1,db1,dw2,db2, cost

In [21]:
def model(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes, num_iterations, learning_rate):
    train_x,test_x,train_y,test_y = preprocess(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig)
    hidden_nodes = 100
    activation = 'relu'
    w1,b1,w2,b2 = initial_weights(train_x,hidden_nodes)
    w1,b1,w2,b2,dw1,db1,dw2,d2, cost = train_model(train_x,train_y,test_x,test_y,w1,b1,w2,b2, learning_rate = learning_rate, num_iterations = num_iterations,activation = activation)

In [23]:
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
num_iterations = int(input("Enter iterations : "))
learning_rate = float(input("Enter learning rate : "))

model(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes, num_iterations, learning_rate)

Enter iterations : 1000
Enter learning rate : 0.01
train_x shape (209, 12288)
train_y (209, 1)
test_x (50, 12288)
test_y (50, 1)


  
  


Iteration:  0		Training Accuracy: 0.3445	Testing Accuracy: 0.6600
Iteration:  100		Training Accuracy: 0.8086	Testing Accuracy: 0.6400
Iteration:  200		Training Accuracy: 0.8278	Testing Accuracy: 0.6800
Iteration:  300		Training Accuracy: 0.8373	Testing Accuracy: 0.6800
Iteration:  400		Training Accuracy: 0.8469	Testing Accuracy: 0.7200
Iteration:  500		Training Accuracy: 0.8708	Testing Accuracy: 0.7200
Iteration:  600		Training Accuracy: 0.8995	Testing Accuracy: 0.7200
Iteration:  700		Training Accuracy: 0.9139	Testing Accuracy: 0.7000
Iteration:  800		Training Accuracy: 0.9378	Testing Accuracy: 0.6800
Iteration:  900		Training Accuracy: 0.9474	Testing Accuracy: 0.6800
