Building a neural network from scratch.

In [8]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

Our Activation function will be the relu function throughout.

In [9]:
def relu_fun(z):
    a = max(0, z)
    return a

The neural_network(layer_sizes) takes a list/ nparray which has the number of neurons it wants in the ith layer as its ith element.

In [10]:
def neural_network(layer_sizes):
    params = {} #an empty dictionary

    for i in range (1, len(layer_sizes)):
        params['W' + str(i)] = np.random.rand(layer_sizes[i], layer_sizes[i-1])*0.01
        params['B' + str(i)] = np.random.rand(layer_sizes[i]).reshape(layer_sizes[i], 1)*0.01

    return params

Forward Propogation: Going from ith to (i+1)th layer; i.e., calculating A(i+1) using A(i), W(i) and B(i).

In [11]:
def forward_propogation(X_train, params):
    layers=len(params)//2
    values = {}

    for i in range(1, layers+1):
        if i == 1:
            values['Z' + str(i)] = params['W'+str(i)]@X_train + params['B'+str(i)]
            values['A' + str(i)] = relu_fun(values['Z' + str(i)])
        else:
            values['Z' + str(i)] = params['W'+str(i)]@values['A' + str(i-1)] + params['B'+str(i)]
            if i==layers:
                values['A' + str(i)] = values['Z' + str(i)]
            else:
                values['A' + str(i)] = relu_fun(values['Z' + str(i)])
    return values

The compute_cost(values, Y_train) function to find the error.

In [12]:
def compute_cost(values, Y_train):
    #mean square error = (1/2*m)(summation((y_pred-y_actual)^2))
    y_pred = values['A' + str(len(values)/2)]
    cost = (np.sum(np.square(y_pred-Y_train)))/2/len(Y_train)
    return cost

Now we find all the gradients using backward propogation.

In [13]:

def backward_propagation(params, values, X_train, Y_train): #takes parameters, activations, training set as input and returns gradients wrt parameters
    layers = len(params)//2
    m = len(Y_train)
    grads = {}
    for i in range(layers,0,-1):
        if i==layers:
            dA = (1/m )* (values['A' + str(i)] - Y_train)
            dZ = dA
        else:
            dA = np.dot(params['W' + str(i+1)].T, dZ)
            dZ = np.multiply(dA, np.where(values['A' + str(i)]>=0, 1, 0))
        if i==1:
            grads['W' + str(i)] = 1/m * np.dot(dZ, X_train.T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
        else:
            grads['W' + str(i)] = 1/m * np.dot(dZ,values['A' + str(i-1)].T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
    return grads

updating_params uses the gradients calculated and the initial values of the various parameters to find their final value.

In [14]:
def updating_params(params, learning_rate, grads):
    updated_params = {}
    
    for i in range(1, len(params)//2+1):
        updated_params['W' + str(i)] = params['W' + str(i)] - learning_rate*grads['W' + str(i)]
        updated_params['B' + str(i)] = params['B' + str(i)] - learning_rate*grads['B' + str(i)]
    
    return updated_params

By now we have completed the work of any neural network. We just have to stitch it all together so that the user gives an input of training data, and the hyperparameters, and we can perform all these functions in the right order.

In [15]:
def fit(X_train, Y_train, learning_rate, layer_sizes, num_iters):
    params = neural_network(layer_sizes)
    for i in range(num_iters):
        values = forward_propogation(X_train, params)
        grads = backward_propagation(params, values, X_train, Y_train)
        
        params = updating_params(params, learning_rate, grads)
        print("iteration :", i+1)
    
    return params

A function to find the accuracy and another to predict.

In [16]:
def compute_accuracy(X_train, X_test, Y_train, Y_test, params):
    values_train=forward_propogation(X_train, params)
    values_test=forward_propogation(X_test, params)
    train_acc=np.sqrt(mean_squared_error(Y_train, values_train['A'+str(len(params)//2)].T))
    test_acc=np.sqrt(mean_squared_error(Y_test, values_test['A'+str(len(params)//2)].T))
    return train_acc, test_acc

def predict(X_test, params):
    values=forward_propogation(X_test, params, relu_fun)
    pred=values['A'+str(len(params)//2)].T
    return pred

Let's import the Boston Housing dataset.

In [17]:
df_boston=pd.read_csv('database/BostonHousing.csv')
df_boston_X=df_boston.drop(['medv'], axis=1)
df_boston_Y=df_boston['medv']
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test=train_test_split(df_boston_X, df_boston_Y, test_size=0.2, random_state=4)
X_train=X_train.T
X_test=X_test.T
Y_train=Y_train.T
Y_test=Y_test.T

In [18]:
arr_X_train=X_train.values
arr_X_test=X_test.values
arr_Y_train=Y_train.values
arr_Y_test=Y_test.values

layer_sizes=[13, 64, 128, 64, 1]
relu_fun=np.vectorize(relu_fun)
params=fit(arr_X_train, arr_Y_train, 0.01, layer_sizes, 100)

iteration : 1
iteration : 2
iteration : 3
iteration : 4
iteration : 5
iteration : 6
iteration : 7
iteration : 8
iteration : 9
iteration : 10
iteration : 11
iteration : 12
iteration : 13
iteration : 14
iteration : 15
iteration : 16
iteration : 17
iteration : 18
iteration : 19
iteration : 20
iteration : 21
iteration : 22
iteration : 23
iteration : 24
iteration : 25
iteration : 26
iteration : 27
iteration : 28
iteration : 29
iteration : 30
iteration : 31
iteration : 32
iteration : 33
iteration : 34
iteration : 35
iteration : 36
iteration : 37
iteration : 38
iteration : 39
iteration : 40
iteration : 41
iteration : 42
iteration : 43
iteration : 44
iteration : 45
iteration : 46
iteration : 47
iteration : 48
iteration : 49
iteration : 50
iteration : 51
iteration : 52
iteration : 53
iteration : 54
iteration : 55
iteration : 56
iteration : 57
iteration : 58
iteration : 59
iteration : 60
iteration : 61
iteration : 62
iteration : 63
iteration : 64
iteration : 65
iteration : 66
iteration : 67
iter

In [19]:
train_acc, test_acc=compute_accuracy(arr_X_train, arr_X_test, arr_Y_train, arr_Y_test, params)
print(train_acc, test_acc)

9.776763095703163 9.911634095608784
