In [0]:
import numpy as np
import sklearn
from sklearn import datasets


iris = datasets.load_iris()
X, y = iris.data, iris.target
X=X.T
y=y.reshape([1,150])





In [0]:
print("The shape of X: " + str(X.shape))
print("The shape of y: " + str(y.shape))

The shape of X: (4, 150)
The shape of y: (1, 150)


In [0]:

def sigmoid(Z):
    return 1/(1+np.exp(-Z))
    

def relu(Z):
    return np.maximum(0,Z)

def tanh(Z):
    return  (np.exp(Z) - np.exp(-Z))/(np.exp(Z) + np.exp(-Z))


In [0]:
def dSigmoid(Z):
        return sigmoid(Z)*(1-sigmoid(Z))

def drelu(Z):
        return Z > 0

def dtanh(Z):
        return 1 - np.power(tanh(Z),2)

In [0]:
def initialize_parameters(n_x,n_h,n_y):


        W1 = np.random.randn(n_h,n_x)*0.01                                   # The values of weight is put small to prevent saturation. Since the network is smaller we need not worry about exploding or vanishing
        b1 = np.zeros([n_h,1])
        W2 = np.random.randn(n_y,n_h)*0.01                                      
        b2 = np.zeros([n_y,1])

        parameters = {
                "W1" : W1,
                "b1" : b1,
                "W2" : W2,
                "b2" : b2
        }

        return parameters

In [0]:
def cross_entropy_cost(A2,Y,m):
        """ Cost function
                input: A2 (final activation) [1,m]
                        Y(ground truth)      [1,m]
                        m(no. of training examples)

                output: J cost function """ 




        logprobs = Y*np.log(A2) + (1-Y)*np.log(1-A2)   ##Calculates the element wise matrix multiplication 
        cost = -1/m*np.sum(logprobs)                   ##Sums them and returns cost
        cost = np.squeeze(cost)                        ##Ensure cost is a single number
        return cost
       

In [0]:
def forward_prop(X,parameters):
    """Computes forward propagation 
        
       input: X of shape(n_x,m) (feature space)
       parameters: dictionary containing Weights and biases
       
       return : (A2)"""


    m = X.shape[1]
    W1=parameters['W1']
    b1=parameters['b1']
    W2=parameters['W2']
    b2=parameters['b2']

    Z1= np.dot(W1,X) + b1
    A1=relu(Z1)
    Z2 =np.dot(W2,A1) + b2
    A2= sigmoid(Z2)

    

   

    cache = {
            "Z1" : Z1,
            "A1" : A1,
            "Z2" : Z2,
            "A2" : A2,
            "X"  : X
    }

    return cache

In [0]:
def back_prop(cache,parameters,Y):
        """ Algorithm for backward propagation 
                input : cache, dictionary contain X,Z1,Z2,A1,A2
                output: gradients, dictionary containing dZ1,db1,dZ2,db2 """
       
       
        m = Y.shape[1]                       # No. of training examples
       
        
        # Initialization
        W2=parameters['W2']                  
        A1=cache['A1']  
        A2=cache['A2']
        Z1=cache['Z1']
        X=cache['X']

    

 
        dZ2 = A2 - Y
        dW2 = 1/m *np.dot(dZ2,A1.T)
        db2 = 1/m*np.sum(dZ2,axis=1, keepdims = True)


        dZ1  = np.dot(W2.T,dZ2)*drelu(Z1)
        dW1 = 1/m* np.dot(dZ1,X.T)
        db1 = 1/m*np.sum(dZ1,axis=1, keepdims = True)

        gradients = {
                
                "dW2"   : dW2,
                "db2"   : db2,
                "dW1"   : dW1,
                "db1"   : db1
        }

        return gradients

In [0]:
def update_parameters(parameters,gradients,learning_rate):
        """ Updates all the parameters per iteration """
        


        ## Extracting values from dictionary
        W1=parameters['W1']
        b1=parameters['b1']
        W2=parameters['W2']
        b2=parameters['b2']

        dW1=gradients['dW1']
        dW2=gradients['dW2']
        db1=gradients['db1']
        db2=gradients['db2']

        #Update

        W1 += -learning_rate*dW1
        W2 += -learning_rate*dW2
        b1 += -learning_rate*db1
        b2 += -learning_rate*db2

        updated_params={
                "W1" : W1,
                "W2" : W2,
                "b1" : b1,
                "b2" : b2
        }

        return updated_params


In [0]:
def grad_des(X,num_iterations,learning_rate,Y):

    n_x = X.shape[0]
    n_h = 4                                                             ##Hard coding this to be 4 units hidden layer
    n_y = Y.shape[0]
    m=X.shape[1]
    parameters = initialize_parameters(n_x,n_h,n_y)
   
    for _ in range(num_iterations):

        cache = forward_prop(X,parameters)
        gradients=back_prop(cache,parameters,Y)
        parameters=update_parameters(parameters,gradients,learning_rate)
        
        A2=cache['A2']
        print("Cost: " + str(cross_entropy_cost(A2,Y,m)))
        


    return parameters

In [0]:
parameters=grad_des(X,1000,0.05,y)
print(iris.data[:3].shape)
cache=forward_prop(iris.data[:3].T,parameters)
A2=cache['A2']
A2=np.round(A2)

print(A2)


Cost: 0.6934776434463045
Cost: 0.6808615896803832
Cost: 0.6686538857010822
Cost: 0.6567824438300727
Cost: 0.6452239691042235
Cost: 0.6339676388199517
Cost: 0.6230048003516644
Cost: 0.6123145022433355
Cost: 0.6018876112187853
Cost: 0.5917171050836135
Cost: 0.5817961007077118
Cost: 0.5721178105505718
Cost: 0.5626756019537344
Cost: 0.5534630285985113
Cost: 0.5444738474654612
Cost: 0.5357018843500725
Cost: 0.5271408374503768
Cost: 0.5187853458032718
Cost: 0.5106291323628814
Cost: 0.5026669199762456
Cost: 0.4948933880050836
Cost: 0.48730262565862864
Cost: 0.4798901009292087
Cost: 0.47265057702544094
Cost: 0.4655790804339714
Cost: 0.45867089544784667
Cost: 0.4519213473942518
Cost: 0.4453259348136128
Cost: 0.4388802726088949
Cost: 0.43258005326298116
Cost: 0.42642108786186533
Cost: 0.42039934626841324
Cost: 0.41451082216659174
Cost: 0.40875193234711904
Cost: 0.4031186086166062
Cost: 0.39760759175092425
Cost: 0.39221514024737164
Cost: 0.3869378924970687
Cost: 0.3817724187767773
Cost: 0.3767154