# Related Libraries Importing

In [14]:
import numpy as np
import os 

# Helper Functions Defination

In [15]:
# Activation functions defination
def activation(x,activation_type= 'sigmoid'):
    
    if activation_type not in ['sigmoid','tanh', 'relu','lrelu']:
        raise ValueError(" activation type must be in ['sigmoid','tanh', 'relu','lrelu']")
    
    if activation_type == 'sigmoid':
        return (1/(1+np.exp(-x)))
    
    elif activation_type == 'tanh':
        return np.tanh(x)
    
    elif activation_type == 'relu':
        return np.maximum(0.0,x)
    
    elif activation_type == 'lrelu':
        return np.maximum(0.01 * x, x)

In [16]:
# Activation fumnctions derivatives
def act_derivative(x, activation_type= 'sigmoid'):

    if activation_type not in ['sigmoid','tanh', 'relu','lrelu']:
        raise ValueError(" activation type must be in ['sigmoid','tanh', 'relu','lrelu']")    
    
    if activation_type == 'sigmoid':
        return activation(x, activation_type= activation_type)\
        * (1- activation(x, activation_type=activation_type))
    
    elif activation_type == 'tanh':
        return 1- (activation(x,activation_type= activation_type))**2
    
    elif activation_type == 'relu':
        return np.where(x <= 0.0, 0.0,1.0)
    
    elif activation_type == 'lrelu':
        return np.where(x <= 0.0, 0.01 * x,1.0)

In [17]:
# Random data generation for function
def data_generator(num_features=10, num_examples=500,train_p=0.8):
    
    dataset = np.random.randint(0,20,size=(num_features,num_examples))
    labels = np.random.randint(0,2,(1,num_examples))
    
    x_train = dataset[:,:int(num_examples * train_p)]
    y_train = labels[:,:int(num_examples * train_p)]
    x_test = dataset[:,int(num_examples * train_p) :]
    y_test = labels[:,int(num_examples * train_p):]
    return x_train, y_train, x_test, y_test

# Shallow Neural Network Formulation

In [18]:
#random data generation
x_train, y_train, x_test, y_test = data_generator(10,1000,0.75)

In [19]:
print('the number of training examples is: '+ str(x_train.shape[1]))
print('the number of testing examples is : '+ str(x_test.shape[1]))
print('the number input fetures is       : '+ str(x_train.shape[0]))
print('-----------------------------------------')
print('the training datset shape is : ' + str(x_train.shape))
print('the training labels shape is : ' + str(y_train.shape))
print('the testing datset shape is : ' + str(x_test.shape))
print('the testing labels shape is : ' + str(y_test.shape))

the number of training examples is: 750
the number of testing examples is : 250
the number input fetures is       : 10
-----------------------------------------
the training datset shape is : (10, 750)
the training labels shape is : (1, 750)
the testing datset shape is : (10, 250)
the testing labels shape is : (1, 250)


In [20]:
# parameters initiation
def initialize_parameters(X, Y, hidden_size= 5):
    input_size =  X.shape[0]
    output_size = Y.shape[0]
    
    W1 = np.random.randn(hidden_size,input_size)*.01
    b1 = np.zeros((hidden_size,1)) 
    W2 = np.random.randn(output_size,hidden_size)*.01
    b2 = np.zeros((output_size,1))
    
    assert (W1.shape == (hidden_size, input_size))
    assert (b1.shape == (hidden_size, 1))
    assert (W2.shape == (output_size, hidden_size))
    assert (b2.shape == (output_size, 1))
    
    parameters ={'W1':W1,
                 'b1':b1,
                 'W2':W2,
                 'b2':b2} 
    
    return parameters

In [21]:
# Forward propagation function
def forward_prop(X, Y, parameters, hidden_activation= 'tanh'):
       
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']   
       
    Z1 = np.dot(W1,X) + b1
    A1 = activation(Z1,activation_type=hidden_activation)
    Z2 = np.dot(W2,A1) + b2
    A2 = activation(Z2, activation_type= 'sigmoid')
        
    m = X.shape[1]
    
    cost = np.squeeze((-1/m)*(np.dot(Y,np.log(A2).T)+ np.dot((1-Y),np.log(1-A2).T)))
    assert(cost.shape == ())
    
    cache = {'Z1':Z1,
             'A1':A1,
             'Z2':Z2,
             'A2':A2}
    
    return cost, cache

In [22]:
# Backward propagation function
def backward_prop(X, Y, parameters,cache, hidden_activation = 'tanh'):
        
    m = x_train.shape[1]
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    
    A1 = cache['A1']
    A2 = cache['A2']
    
    Z1 =cache['Z1']
    
    dZ2 =  A2-y_train
    dW2 = (1/m) * np.dot(dZ2,A1.T)
    db2 = (1/m) * np.sum(dZ2, axis= 1,keepdims=True)
    
    dZ1 = np.dot(W2.T,dZ2) * act_derivative(Z1, activation_type= hidden_activation)
    dW1 = (1/m) * np.dot(dZ1,X.T)
    db1 = np.sum(dZ1, axis= 1, keepdims= True)
    
    derivatives= {'dW2':dW2,
                  'db2':db2,
                  'dW1':dW1,
                  'db1':db1}
    return derivatives

In [23]:
# Weights optimization 
def optimize(parameters,derivatives, lr =0.01):

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    dW1 = derivatives['dW1']
    db1 = derivatives['db1']
    dW2 = derivatives['dW2']
    db2 = derivatives['db2']
    
    parameters = {'W1': W1 - lr * dW1, 
                 'b1': b1 - lr * db1, 
                 'W2': W2 - lr * dW2, 
                 'b2': b2 - lr * db2}
    
    return parameters 
    

In [39]:
# Merge all function into a single function
def model(X, Y, num_iterations= 10000, hidden_size = 5, hidden_activation='tanh',lr = 0.01 ,print_cost = True):
    
    parameters = initialize_parameters(X,Y, hidden_size= hidden_size)
    
    for i in range(num_iterations):
        
        cost, cache = forward_prop(X,Y,parameters, hidden_activation= hidden_activation)
        
        derivatives = backward_prop(X,Y,parameters,cache,hidden_activation=hidden_activation)
        
        parameters = optimize(parameters,derivatives,lr=lr)
        
        if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
            
    print('\nthe ultimate cost value is: ' + str(cost))
    return parameters

In [40]:
# Prediction function
def predict(X,Y,parameters, hidden_activation='tanh'):
    
    _ , cache = forward_prop(X, Y,parameters, hidden_activation = hidden_activation)
    predictions = (cache['A2']>0.5)
    print ('Accuracy: %d' % float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) + '%')
    

In [41]:
parameters = model(x_train,y_train, num_iterations=1000,hidden_size=50,hidden_activation='tanh',lr=0.03, print_cost=True)

Cost after iteration 0: 0.693035

the ultimate cost value is: 0.6863694428272646


In [30]:
predict(x_train,y_train,parameters, hidden_activation = 'tanh')

Accuracy: 99%


In [31]:
predict(x_test,y_test,parameters, hidden_activation = 'tanh')

Accuracy: 47%
