In [1]:
import numpy as np
import math

def initialise_params(n_h,n_prev):
    #He initialisation
    W = np.random.randn(n_h,n_prev) * math.sqrt(2/n_prev)
    
    b = np.zeros((n_h,1))
    
    return W, b

def Leaky_ReLU(x):
    return np.where(x > 0, x, x * 0.01)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

class Layer:
    def __init__(self, n_hidden, n_prev, activation = "relu"):
        
        self.n_h = n_hidden
        
        self.W , self.b = initialise_params(self.n_h,n_prev)
        
        self.activation_func = activation

        self.grads = tuple()

        self.layer_cache = tuple()

    # Computes forward pass through this layer
    def forward(self, A_prev):
        
        Z = np.dot(self.W,A_prev) + self.b

        A = 0
        
        if self.activation_func == "relu":
            A = Leaky_ReLU(Z)

        elif self.activation_func == "tanh":
            A = np.tanh(Z)

        elif self.activation_func == "sigmoid":
            A = sigmoid(Z)
        
        self.layer_cache = (Z,A,)

        return A

    # Computes backward pass through this layer
    def backward(self, dA, A_prev):
        m = A_prev.shape[1]
        
        dZ = 0
        
        Z = self.layer_cache[0]

        if self.activation_func == "relu":
            dZ = dA * np.where(Z > 0, 1, 0.01)

        if self.activation_func == "tanh":
            
            dZ = dA * (1 - (np.tanh(Z) ** 2))

        if self.activation_func == "sigmoid":

            dZ = dA * (sigmoid(Z) * (1 - sigmoid(Z)))

        dW = (1/m)*(np.dot(dZ, A_prev.T))

        db = (1/m)*(np.sum(dZ, axis=1, keepdims=True))
              
        dA_prev = np.dot(self.W.T, dZ)
               
        self.grads = (dW, db, dZ, dA,)
        
        return dA_prev



class Model:
    """
    L - layer model with each layer having adjustable number of  units
    Only one output possible
    Cross Entropy cost
    tanh and sigmoid activations used, relu also available
    
    """
    def __init__(self, layers_dims, activation_funcs):
        
        self.n_layers = len(layers_dims)

        # List of layers of the model
        self.layers = []
        
        for i in range(1,self.n_layers):
            
            self.layers.append(Layer(layers_dims[i], layers_dims[i-1], activation_funcs[i-1]))

        self.layers.append(Layer(1,layers_dims[i], "sigmoid")) # Final output layer



    # Computes forward pass of model        
    def forward_model(self, X):
        
        A = self.layers[0].forward(X)
            
        for i in range(1,len(self.layers)):
            
            A = self.layers[i].forward(A)

        return A

    # Computes backward pass of model
    def backward_model(self, X, Y, AL):
        
        dA = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
        

        for i in reversed(range(1,len(self.layers))):

            dA = self.layers[i].backward(dA, self.layers[i-1].layer_cache[1])


        dA = self.layers[0].backward(dA, X)

    # Perform gradient descent
    def update_params(self, learning_rate = 0.05):
        for layer in self.layers:

            layer.W = layer.W - (learning_rate * layer.grads[0])
            
            layer.b = layer.b - (learning_rate * layer.grads[1])

    
    def train(self, X_train, Y_train, iterations = 1000, learning_rate = 0.05):

        # Take one step of gradient descent per iteration
        for i in range(1, iterations+1):
            AL = self.forward_model(X_train)

            self.backward_model(X_train,Y_train,AL)

            self.update_params(learning_rate)

            if i%10 == 0:
                cost = self.compute_cost(AL, Y_train)
                print(f"Cost at iteration {i} is ", cost)

    # Computes cross entropy cost
    def compute_cost(self, ypred, y):
        Y = y.reshape(1,y.shape[0])

        m = Y.shape[1] # number of training examples
        
        logprobs = (np.multiply(np.log(ypred),y)) + (np.multiply(np.log(1-ypred),1-y))
        
        cost = -(1/m) * np.sum(logprobs)
        
        cost = float(np.squeeze(cost)) # Ensures that 12.35 is returned as opposed to [[12.35]]

        return cost

    # Evaluate model on test set
    def evaluate(self, X_test, Y_test):
        A = self.forward_model(X_test)

        cost = self.compute_cost(A, Y_test)

        print("Accuracy on test set is: ", (1 - cost) * 100, "%")



In [2]:
from sklearn.model_selection import train_test_split
import pandas as pd

In [3]:
bank_data = pd.read_csv("./data_banknote_authentication.csv")
X_bank = pd.concat([bank_data["Attr1"],bank_data["Attr2"],bank_data["Attr3"],bank_data["Attr4"]], axis = 1)
Y_bank = bank_data["target"]

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X_bank, Y_bank, test_size=0.2)
X_train_array = X_train.to_numpy().T
Y_train_array = Y_train.to_numpy().T
X_test_array = X_test.to_numpy().T
Y_test_array = Y_test.to_numpy().T

In [5]:
# 4 at index 0 is the number of input features
# 6,7,4 are the number of units in the 1, 2, 3rd layers
layers_dims = [4,32,32,8,]
activations = ["relu", "tanh", "relu",]
DeepNN = Model(layers_dims, activations)

In [6]:
DeepNN.train(X_train_array, Y_train_array, 150, 0.1)

Cost at iteration 10 is  0.39828025236458914
Cost at iteration 20 is  0.20616919747564386
Cost at iteration 30 is  0.12327593878123255
Cost at iteration 40 is  0.0808872807054035
Cost at iteration 50 is  0.057458719848715464
Cost at iteration 60 is  0.04357815365708165
Cost at iteration 70 is  0.034501237569183885
Cost at iteration 80 is  0.02816275217657873
Cost at iteration 90 is  0.02350456350417371
Cost at iteration 100 is  0.019973283312060316
Cost at iteration 110 is  0.017226313189122395
Cost at iteration 120 is  0.015071972724693168
Cost at iteration 130 is  0.013344010609645966
Cost at iteration 140 is  0.011929768749921762
Cost at iteration 150 is  0.010755900326575448


In [7]:
DeepNN.evaluate(X_test_array, Y_test_array)

Accuracy on test set is:  98.82822786267155 %
