In [0]:
import numpy as np
import sklearn
from sklearn import datasets
from sklearn.datasets import load_breast_cancer
from sklearn import preprocessing
from sklearn.model_selection import train_test_split


dataset=load_breast_cancer()
X, y = dataset.data, dataset.target
m=X.shape[1]






In [0]:
#Dataset preprocessing
X_scaled = preprocessing.scale(X)             #Now all the features are standardized in this (m,n_x) matrix
X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=0.33,random_state=0)
X_train,X_test = X_train.T,X_test.T                                        #Reshaping it to be (n_x,num example) matrices
y_train,y_test = y_train.reshape([1,y_train.shape[0]]),y_test.reshape([1,y_test.shape[0]])   #Reshaping it to be (1, num_example) matrices


In [187]:
print("The shape of X: " + str(X.shape))
print("The shape of y: " + str(y.shape))


The shape of X: (569, 30)
The shape of y: (569,)


In [0]:

def sigmoid(Z):
    return 1/(1+np.exp(-Z))
    

def relu(Z):
    return np.maximum(0,Z)

def tanh(Z):
    return  (np.exp(Z) - np.exp(-Z))/(np.exp(Z) + np.exp(-Z))


In [0]:
def dSigmoid(Z):
        return sigmoid(Z)*(1-sigmoid(Z))

def drelu(Z):
        return Z > 0

def dtanh(Z):
        return 1 - np.power(tanh(Z),2)

In [0]:
def initialize_parameters(n_x,n_h,n_y):


        W1 = np.random.randn(n_h,n_x)*0.01                                   # The values of weight is put small to prevent saturation of gradients. Since the network is smaller we need not worry about exploding or vanishing
        b1 = np.zeros([n_h,1])
        W2 = np.random.randn(n_y,n_h)*0.01                                      
        b2 = np.zeros([n_y,1])
        
        assert (W1.shape == (n_h, n_x))
        assert (b1.shape == (n_h, 1))
        assert (W2.shape == (n_y, n_h))
        assert (b2.shape == (n_y, 1))

        parameters = {
                "W1" : W1,
                "b1" : b1,
                "W2" : W2,
                "b2" : b2
        }

        return parameters

In [0]:
def cross_entropy_cost(A2,Y,m):
        """ Cost function
                input: A2 (final activation) [1,m]
                        Y(ground truth)      [1,m]
                        m(no. of training examples)

                output: J cost function """ 




        logprobs = Y*np.log(A2) + (1-Y)*np.log(1-A2)   ##Calculates the element wise matrix multiplication 
        cost = -1/m*np.sum(logprobs)                   ##Sums them and returns cost
        cost = np.squeeze(cost)                        ##Ensure cost is a single number
        return cost
       

In [0]:
def forward_prop(X,parameters):
    """Computes forward propagation 
        
       input: X of shape(n_x,m) (feature space)
       parameters: dictionary containing Weights and biases
       
       return : (A2)"""


    m = X.shape[1]
    W1=parameters['W1']
    b1=parameters['b1']
    W2=parameters['W2']
    b2=parameters['b2']
    
    #print("Shape of W1 is : " + str(W1.shape))
    #print("Shape of b1 is : " + str(b1.shape))
    #print("Shape of W2 is : " + str(W2.shape))
    #print("Shape of b2 is : " + str(b2.shape))

    Z1= np.dot(W1,X) + b1
    A1=relu(Z1)
    Z2 =np.dot(W2,A1) + b2
    A2= sigmoid(Z2)
    
    
    
    cache = {
        "Z1" : Z1,
        "Z2" : Z2,
        "A1" : A1,
        "A2" : A2
    }
    
    return A2,cache

In [0]:
def back_prop(X,cache,parameters,Y):
        """ Algorithm for backward propagation 
                input : cache, dictionary contain X,Z1,Z2,A1,A2
                output: gradients, dictionary containing dZ1,db1,dZ2,db2 """
       
       
        m = Y.shape[1]                       # No. of training examples
       
        
        # Initialization
        W2=parameters['W2']                  
        A1=cache['A1']  
        A2=cache['A2']
        Z1=cache['Z1']
        
        
        
        

    

 
        dZ2 = A2 - Y
        dW2 = 1/m *np.dot(dZ2,A1.T)
        db2 = 1/m*np.sum(dZ2,axis=1, keepdims = True)


        dZ1  = np.dot(W2.T,dZ2)*drelu(Z1)
        dW1 = 1/m* np.dot(dZ1,X.T)
        db1 = 1/m*np.sum(dZ1,axis=1, keepdims = True)

        gradients = {
                
                "dW2"   : dW2,
                "db2"   : db2,
                "dW1"   : dW1,
                "db1"   : db1
        }

        return gradients

In [0]:
def update_parameters(parameters,gradients,learning_rate):
        """ Updates all the parameters per iteration """
        


        ## Extracting values from dictionary
        W1=parameters['W1']
        b1=parameters['b1']
        W2=parameters['W2']
        b2=parameters['b2']

        dW1=gradients['dW1']
        dW2=gradients['dW2']
        db1=gradients['db1']
        db2=gradients['db2']

        #Update

        W1 += -learning_rate*dW1
        W2 += -learning_rate*dW2
        b1 += -learning_rate*db1
        b2 += -learning_rate*db2

        updated_params={
                "W1" : W1,
                "W2" : W2,
                "b1" : b1,
                "b2" : b2
        }

        return updated_params


In [0]:
def grad_des(X,num_iterations,learning_rate,Y):

    n_x = X.shape[0]
    n_h = 4                                                             ##Hard coding this to be 4 units hidden layer
    n_y = Y.shape[0]
    m=X.shape[1]
    parameters = initialize_parameters(n_x,n_h,n_y)
   
    for _ in range(num_iterations):

        A2,cache = forward_prop(X,parameters)
        gradients=back_prop(X,cache,parameters,Y)
        parameters=update_parameters(parameters,gradients,learning_rate)
        
        
        #print("Cost: " + str(cross_entropy_cost(A2,Y,m)))
        

    
    print("Cost: " + str(cross_entropy_cost(A2,Y,m)))
    return parameters

In [0]:
def predict(X,parameters):
    
     A2,_=forward_prop(X,parameters)
     return A2

In [197]:
parameters=grad_des(X_train,15000,0.005,y_train)

prediction_train=predict(X_train,parameters)
prediction_train=np.round(prediction_train)

prediction_test=predict(X_test,parameters)
prediction_test=np.round(prediction_test)

m_train= X_train.shape[1]
m_test=X_test.shape[1]
print(X_train.shape)
print("Training Accuracy is: " +str(np.sum(prediction_train == y_train)*1/m_train*100 )+"%")
print("Test Accuracy is: " +str(np.sum(prediction_test == y_test)*1/m_test*100 )+"%")




Cost: 0.051938078743140985
(30, 381)
Training Accuracy is: 98.68766404199475%
Test Accuracy is: 98.40425531914893%
