In [1]:
import numpy as np

In [2]:
# RANDOM INITIALIZATION FOR THE PARAMETERS.

def randInitNN(dimLayers):
    # Input:  Size of the vector of weights for each layer of the NN.
    # Output: List of parameters of the NN: Matrix of weights W and vector of bias b for each layer.
    
    parameters = []
    
    for l in range(1,len(dimLayers)):
        W = np.random.randn(dimLayers[l],dimLayers[l-1]) / np.sqrt(dimLayers[l-1])
        b = np.zeros([dimLayers[l],1])
        
        parameters.append({"Weights": W,"bias": b})
    
    return parameters

In [3]:
# THE FUNCTION CALCULATES THE LINEAR STEP FOR A NEURONAL NETWORK LAYER.

def linearNN(X,W,b):
    # Input:  Matrix of examples X.
    #         Matrix of weights W.
    #         Vector of bias b.
    # Output: Matrix Z, output of the linear layer.
    
    Z = np.dot(W,X) + b
    
    return Z

In [4]:
# THE FUNCTION COMPUTES THE SIGMOID OF THE MATRIX Z, OUTPUT OF THE LINEAR STEP.

def sigmoidForeward(Z):
    # Input:  Matrix Z, output of the linear step.
    # Output: Post-activation matrix A.
    
    A = 1/(1 + np.exp(-Z))
    
    return A

In [5]:
# THE FUNCTION COMPUTES THE RELU OF THE MATRIX Z, OUTPUT OF THE LINEAR STEP.

def reluForeward(Z):
    # Input:  Matrix Z, output of the linear step.
    # Output: Post-activation matrix A.
    
    A = np.maximum(0,Z)
    
    return A

In [6]:
# THE FUNCTION CALCULATES THE FOREWARD MODEL OF A NEURAL NETWORK.

def forewardProp(X,parameters):
    # Input:  Matrix of examples X.
    #         List of parameters of the NN: Matrix of weights W and vector of bias b for each layer.
    # Output: List which contains the matrix Z output of the linear layer and the post-activation matrix A for each layer.
    
    caches = []
    
    A = X
    
    for i in range(len(parameters)-1):
        W = parameters[i]["Weights"]
        b = parameters[i]["bias"]
        
        Z = linearNN(A, W, b)
        A = reluForeward(Z)
        
        caches.append({"Z": Z,"A": A})

    WL = parameters[len(parameters)-1]["Weights"]
    bL = parameters[len(parameters)-1]["bias"]
    ZL = linearNN(A,WL,bL)
    AL = sigmoidForeward(ZL)
    
    caches.append({"Z": ZL,"A": AL})
    
    return caches

In [7]:
# THE FUNCTION CALCULATES THE COST FUNCTION OF A NEURAL NETWORK.

def costfunctionNN(AL,Y,parameters,regu,lambd):
    # Input:  Vector of activation A of the last layer L.
    #         Vector of labels Y.
    #         List of parameters of the NN: Matrix of weights W and vector of bias b for each layer.
    #         Variable regu which chooses the regularization method used to calculate the cost function.
    #         Hyper-parameter lambda. It's the trade-off between regularization and entropy.
    # Output: Scalar of Neural Network cost function result.
    
    m = np.shape(Y)[1]
    
    #cost = -np.sum(np.dot(Y,(np.log(AL)).T) + np.dot((1-Y),(np.log(1-AL)).T)) * (1./m)
    cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y,np.log(1-AL).T))
    
    if regu == 1:
        costL1 = 0
        for i in range(len(parameters)):
            W = parameters[i]["Weights"]
            costL1 = costL1 + np.sum(np.abs(W))
        cost = cost + (lambd/(2*m))*costL1
    elif regu == 2:
        costL2 = 0
        for i in range(len(parameters)):
            W = parameters[i]["Weights"]
            costL2 = costL2 + np.sum(np.square(W))
        cost = cost + (lambd/(2*m))*costL2
    
    cost = np.squeeze(cost)
    
    return cost

In [8]:
# THE FUNCTION COMPUTES THE DERIVATIVE OF THE COST FUNCTION WITH RESPECT TO THE MATRIX Z WHEN USING SIGMOID FUNCTION.

def sigmoidBackward(dA,Z):
    # Input:  Post-activation gradient dA.
    #         Matrix Z, output of the linear step.
    # Output: Gradient of the cost with respect to Z.
    
    S = 1/(1+np.exp(-Z))
    dZ = dA*S*(1-S)
    
    return dZ

In [9]:
# THE FUNCTION COMPUTES THE DERIVATIVE OF THE COST FUNCTION WITH RESPECT TO THE MATRIX Z WHEN USING RELU FUNCTION.

def reluBackward(dA,Z):
    # Input:  Post-activation gradient dA.
    #         Matrix Z, output of the linear step.
    # Output: Gradient of the cost with respect to Z.

    dZ = np.array(dA,copy=True) # just converting dz to a correct object.
    
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z<=0] = 0
    
    return dZ

In [10]:
# THE FUNCTION COMPUTES THE DERIVATIVE OF THE COST FUNCTION WITH RESPECT TO THE PARAMETERS W AND b.

def gradientsNN(dZ,A_prev,m,W,regu,lambd):
    # Input:  Gradient of the cost with respect to Z.
    #         Vector of activation A of the previous layer.
    #         Number of examples.
    #         Vector of weights W.
    #         Variable regu which chooses the regularization method used to calculate the cost function.
    #         Hyper-parameter lambda. It's the trade-off between regularization and entropy.
    # Output: Gradient of the cost with respect to the weights matrix W.
    #         Gradient of the cost with respect to the bias vector b.
    
    dW = np.dot(dZ,A_prev.T) / m
    db = np.sum(dZ,axis=1,keepdims=True) / m
    
    if regu == 1:
        dW = dW + (lambd*np.sign(W)) / m
    elif regu == 2:
        dW = dW + (lambd*W) / m
    
    return dW, db

In [11]:
# THE FUNCTION CALCULATES THE BACKWARD MODEL OF A NEURAL NETWORK.

def backwardProp(X,Y,parameters,caches,regu,lambd):
    # Input:  Matrix of examples X.
    #         Vector of labels Y.
    #         List of parameters of the NN: Matrix of weights W and vector of bias b for each layer.
    #         List which contains the matrix Z output of the linear layer and the post-activation matrix A for each layer.
    #         Variable regu which chooses the regularization method used to calculate the cost function.
    #         Hyper-parameter lambda. It's the trade-off between regularization and entropy.
    # Output: List which contains the derivative of the cost function with respect the parameters W and b for each layer.
    
    gradients = []
    
    m = X.shape[1]
    
    AL = caches[len(parameters)-1]["A"]
    ZL = caches[len(parameters)-1]["Z"]
    A_prev = caches[len(parameters)-2]["A"]
    
    dAL = - (np.divide(Y,AL) - np.divide(1-Y,1-AL))
    dZL = sigmoidBackward(dAL,ZL)
    W = parameters[len(parameters)-1]["Weights"]
    dWL, dbL = gradientsNN(dZL,A_prev,m,W,regu,lambd)
    
    gradients.append({"dW": dWL,"db": dbL})
    
    dZ = dZL
    
    for i in reversed(range(len(parameters)-1)):
        if i == 0:
            A_prev = X
        else:
            A_prev = caches[i-1]["A"]
            
        Z = caches[i]["Z"]
        W = parameters[i+1]["Weights"]       
        dA = np.dot(W.T,dZ)
        dZ = reluBackward(dA,Z)
        
        W = parameters[i]["Weights"]
        dW, db = gradientsNN(dZ,A_prev,m,W,regu,lambd)
        
        gradients.append({"dW": dW,"db": db})
    
    gradients.reverse()
    
    return gradients

In [12]:
# THE FUNCTION UPDATES THE PARAMETERS.

def updateParametersNN(parameters,gradients,learningRate):
    # Input:  List of parameters of the NN: Matrix of weights W and vector of bias b for each layer.
    #         List which contains the derivative of the cost function with respect the parameters W and b for each layer.
    #         Learning rate hyper-parameter.
    # Output: List of updated parameters of the NN: Matrix of weights W and vector of bias b for each layer.
    
    for i in range(len(parameters)):
        parameters[i]["Weights"] = parameters[i]["Weights"] - learningRate*gradients[i]["dW"]
        parameters[i]["bias"] = parameters[i]["bias"] - learningRate*gradients[i]["db"]
        
    return parameters

In [13]:
# THE FUNCTION CREATES A NEURAL NETWORK MODEL.

def NNmodel(X,Y,dimLayers,learningRate,numIterations,regu=0,lambd=0):
    # Input:  Matrix of examples X.
    #         Vector of labels Y.
    #         Size of the vector of weights for each layer of the NN.
    #         Learning rate hyper-parameter.
    #         Number of iterations.
    #         Variable regu which chooses the regularization method used to calculate the cost function.
    #         Hyper-parameter lambda. It's the trade-off between regularization and entropy.
    # Output: List of parameters of the Neural Network, the matrix of weights W and vector of bias b for each layer.
    #         List of scalars of Neural Network cost function results for each iteration.
    
    costs = []
    
    parameters = randInitNN(dimLayers)
    
    for i in range(0,numIterations):
        
        caches = forewardProp(X,parameters)
        
        AL = caches[len(parameters)-1]["A"]

        cost = costfunctionNN(AL,Y,parameters,regu,lambd)
        
        gradients = backwardProp(X,Y,parameters,caches,regu,lambd)
 
        parameters = updateParametersNN(parameters,gradients,learningRate)
        
        # Print the cost every 100 training example
        #if i % 100 == 0:
        #    print ("Cost after iteration %i: %f" %(i, cost))
        #if i % 100 == 0:
        #    costs.append(cost)
        costs.append(cost)
        
    return parameters, costs

In [14]:
# THE FUNCTION CLASSIFIES THE TEST/VALIDATION EXAMPLES.

def classificatorNN(X,parameters):
    # Input:  Matrix of test/validation examples X.
    #         List of parameters of the NN: Matrix of weights W and vector of bias b for each layer.
    # Output: Vector of Neural Networks labels Y predicted.
    
    caches = forewardProp(X,parameters)
    Y_predict = caches[len(parameters)-1]["A"]
    
    Y_predict[Y_predict>=0.5] = 1
    Y_predict[Y_predict<0.5] = 0
    
    return Y_predict

In [15]:
# THE FUNCTION GIVES DIFFERENT EVALUATION METRICS.

def evalModelNN(y_predicted,y_gt):
    # Input:  Vector of Neural Networks labels y predicted.
    #         Vector of labels y.
    # Output: Precision of the results.
    #         Recall of the results.
    #         F1 of the results.
    #         Accuracy of the results.
    
    TP = (y_predicted * y_gt == 1).sum()
    FP = (y_predicted - y_gt == 1).sum()
    TN = (y_predicted + y_gt == 0).sum()
    FN = (y_predicted - y_gt == -1).sum()
    
    Precision = np.round((TP/(TP+FP))*100,decimals=2)
    Recall = np.round((TP/(TP+FN))*100,decimals=2)
    F1 = np.round(2/((1/Precision)+(1/Recall)),decimals=2)
    Accuracy = np.round(((TP+TN)/(TP+TN+FP+FN))*100,decimals=2)
    
    return Precision, Recall, F1, Accuracy