In [21]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [22]:
iris_df = load_iris()

In [23]:
dataframe = pd.DataFrame(iris_df['data'])

In [24]:
dataframe['output'] = iris_df['target']


In [25]:
dataframe = dataframe[dataframe['output'].isin([0,1])]

In [26]:
dataframe['output'].value_counts()

1    50
0    50
Name: output, dtype: int64

In [27]:
X = dataframe.drop('output', axis=1)
y = dataframe['output']

In [28]:
train_x, test_x, train_y, test_y = train_test_split(X,y, test_size = 0.3, random_state =1)

In [29]:
train_x = np.array(train_x.T)
train_y = np.array(train_y).reshape(1,train_y.shape[0])
test_x = np.array(test_x.T)
test_y = np.array(test_y).reshape(1, test_y.shape[0])

In [30]:
#initialize parameters based on the number of hidden layers
def intialize_parameters(layer_dims):
    np.random.seed(1)
    L = layer_dims
    l = len(layer_dims)
    parameters = {}
    for i in range(1, l):
        #The randomly assigned weights are multiplied by np.sqrt(2/previous layer dimension)
        weight = np.random.randn(L[i],L[i-1]) * np.sqrt(2/L[i-1])
#         * (np.sqrt(2/L[i-1]))
        bias = np.zeros([L[i],1]) 
        parameters['W' + str(i)] = weight
        parameters['b' + str(i)] = bias
    return parameters    

In [31]:
#for feed_forward process, the relu activation is used for all hidden layers
#for the final layer, sigmoid activation function is used
def feed_forward(parameters, train_x, layer_dims,keep_probs):
    l = len(layer_dims)
    Aprev = train_x    
    #The order of the data stored in cache is A,W,b
    caches = []
    
    #calculate the feed forward process for all the layers except last layer
    for i in range(1,l-1):
        weight = parameters['W' + str(i)]
        bias   = parameters['b' + str(i)]
        z = np.dot(weight, Aprev) + bias
        Aprev = np.maximum(z,0)
        drop = np.random.rand(Aprev.shape[0],Aprev.shape[1])
        drop = (drop < keep_probs).astype(int)
        Aprev = np.multiply(Aprev, drop)
        Aprev = np.divide(Aprev,keep_probs)
        cache = Aprev, weight, bias,drop
        caches.append(cache) 
        
    #calculate the feed forward process for the last layer
    weight = parameters['W' + str(l-1)]
    bias   = parameters['b' + str(l-1)]
    
    z = np.dot(weight, Aprev) + bias
    Aprev = 1/(1 + np.exp(-z))
    final_output = Aprev
    cache = Aprev, weight, bias,drop
    caches.append(cache)  
    return(caches, final_output)

In [32]:
#calculate the cost using log error function
def calculate_cost(final_output, train_y):
    m = train_y.shape[1]
    cost = -(np.dot(train_y, np.log(final_output).T) +
             np.dot((1 - train_y), np.log(1-final_output).T)) /m
    return cost

In [33]:
def backpropagation(caches, layer_dims, train_x, train_y, final_output,parameters, learning_rate,keep_probs):
    
    l = len(layer_dims) - 1
    m = train_y.shape[1]
    
    #calculate the backpropagation for last layer seperaterly
    Acurr,Wcurr,bcurr,dcurr = caches[l-1]
    Aprev,Wprev,bprev,dprev = caches[l-2]    
    dz = (final_output - train_y)
    dw = np.dot(dz,Aprev.T)/m
    db = np.sum(dz,keepdims=True,axis = 1)/m
    parameters['W' + str(l)] -= learning_rate * dw
    parameters['b' + str(l)] -= learning_rate * db    
    dzprev = dz
    
    #calculate backpropagation for the remaining hidden layers
    for i in reversed(range(l-1)):
        
        Anext,Wnext,bnext,dnext = caches[i+1]
        Acurr,Wcurr,bcurr,dcurr = caches[i]
                
        if i == 0:
            Aprev = train_x
        else:
            Aprev,Wprev,bprev,dprev = caches[i-1]  
        
        dzcurr =  np.where(Acurr > 0, 1, Acurr)   
        da = np.dot(Wnext.T, dzprev)
        da = da * dcurr
        da = da / keep_probs
        dz = dzcurr * da
        dw = np.dot(dz,Aprev.T)/m
        db = np.sum(dz,keepdims = True, axis =1)/m
        parameters['W' + str(i+1)] -= learning_rate * dw
        parameters['b' + str(i+1)] -= learning_rate * db
        dzprev = dz
    return parameters

In [34]:
def deep_neural_model(layer_dims,train_x,train_y,iteration,learning_rate,keep_probs):
    #intialization of the parameters such as weights and bias
    parameters = intialize_parameters(layer_dims)
    
    for i in range(iteration):
        #calculate the activation function values for each layer
        caches, final_output = feed_forward(parameters, train_x, layer_dims,keep_probs)
            
        #calculate the cost
        cost = calculate_cost(final_output, train_y) 
        if i%1000 == 0:
            print('The cost of iteration {} is: {}'.format(i, np.squeeze(cost)))
    
        #calcualte the backpropagation
        parameters = backpropagation(caches, layer_dims, train_x,train_y, final_output,parameters,learning_rate,keep_probs)           
    return parameters    

In [35]:
layer_dims = [4,4,1]
iteration = 14900
learning_rate = 0.001
keep_probs = 0.8
parameters = deep_neural_model(layer_dims, train_x, train_y, iteration, learning_rate,keep_probs)

The cost of iteration 0 is: 1.4634168605933904
The cost of iteration 1000 is: 0.43630193165092296
The cost of iteration 2000 is: 0.2615794012937735
The cost of iteration 3000 is: 0.221025508214681
The cost of iteration 4000 is: 0.21233365880086333
The cost of iteration 5000 is: 0.1322389902594298
The cost of iteration 6000 is: 0.11842787880563288
The cost of iteration 7000 is: 0.13547109614400168
The cost of iteration 8000 is: 0.08377178600533916
The cost of iteration 9000 is: 0.10235635722662778
The cost of iteration 10000 is: 0.10115806692156314
The cost of iteration 11000 is: 0.09141450320281655
The cost of iteration 12000 is: 0.10238646772024368
The cost of iteration 13000 is: 0.05456101167151232
The cost of iteration 14000 is: 0.06789361618041652


In [36]:
#create a predict method which actually estimate the outcome based on the parameters
def predict_target(test_x, parameters,layer_dims):
    l = len(layer_dims)
    Aprev = test_x    
        
    #calculate the feed forward process for all the layers except last layer
    for i in range(1,l-1):
        weight = parameters['W' + str(i)]
        bias   = parameters['b' + str(i)]
        z = np.dot(weight, Aprev) + bias
        Aprev = np.maximum(z,0)        
        
    #calculate the feed forward process for the last layer
    weight = parameters['W' + str(l-1)]
    bias   = parameters['b' + str(l-1)]
    
    z = np.dot(weight, Aprev) + bias
    final_output = 1/(1 + np.exp(-z))  
    final_output =  (final_output > 0.5) * 1
    return(final_output)    

In [37]:
y_predict = predict_target(train_x, parameters,layer_dims)

In [38]:
print(y_predict)

[[1 1 1 1 1 0 0 1 0 0 1 1 1 0 1 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0 1 1 0 0 1
  0 1 0 1 1 0 1 1 0 1 0 1 1 0 0 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 0 1 0 0]]


In [39]:
accuracy_score(y_predict, train_y)

1.0