In [42]:
#importing necessary libraries 
import numpy as np
import pandas as pd

In [62]:
#creating class layer with forward and backward propagation
class Layer():
    # input_size = number of input neurons (number of features of the input sample)
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        #initialising the weights and biases randomly
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data.reshape(1,input_data.size)
        #the output for layer forward pass is w.T*X + b 
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dL/dW, and input error dL/dX for a given output_error=dL/dY
    def backward_propagation(self, output_error, learning_rate):
        #using derived input error dL/dX = output_error*w.T
        input_error = np.dot(output_error, self.weights.T)
        #using derived weights error dL/dW = X.T*output_error
        weights_error = np.dot(self.input.T, output_error)

        # updated parameters using gradient descent 
        # del(w) = learning_rate* dL/dW and w_new = w_old - del(w)
        self.weights -= learning_rate * weights_error
        
        #dL/db = dL/dY (output error)
        #so, del(bias) = Learning_rate* output_error and b_new = b_old - del(bias)
        self.bias -= learning_rate * output_error
        return input_error

In [63]:
# creating an Activation where we can pass our preferrable or given activation function for each layer
class ActivationLayer():
    def __init__(self, activation, activation_prime):
        #input given will be activation function to be used and its derivative function i.e activation_prime
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input through forward pass
    def forward_propagation(self, input_data):
        self.input = input_data
        #just passing the input data through the given activation function
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        #In backward pass, returning the gradient (or input error to be fed to backward pass of other layer)
        #gradient = activation function derivative for given input * output_error (dL/dY)
        return self.activation_prime(self.input) * output_error

In [64]:
# Defining activation function and its derivative for further use 
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1-np.tanh(x)**2

def sigmoid(x):
    return 1/(1+np.exp(-1.0*x))

def sigmoid_prime(x):
    return sigmoid(x)*(1-sigmoid(x))

def linear(x):
    return x 

def linear_prime(x):
    return 1.0

In [81]:
# Defining Mean sqaure loss function and its derivative
def mean_square_loss(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def mean_square_loss_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

In [66]:
#creating a Network class for conneting all the layers and carring out forward and backward propagation
class Network:
    def __init__(self):
        #initialising array for appending layers later
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # adding layer to network
    def add(self, layer):
        self.layers.append(layer)

    # specifying the loss to get used for further processinf like mean square or cross entropy etc.
    def use_loss(self, loss, loss_prime):
        # defined loss function in use
        self.loss = loss
        # loss function derivating in use 
        self.loss_prime = loss_prime

    # predicting the output for given input data
    def prediction(self, input_data):
        samples = len(input_data)  #number of samples in input data
        result = []

        # running network over all the samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fitting(self, x_train, y_train, epochs, learning_rate):
        samples = len(x_train)

        # iterations loop
        for i in range(epochs):
            # iteration over all the input samples
            for j in range(samples):
                # forward pass
                output = x_train[j]
                # passing the output of one layer as the input to other layer
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # backward pass
                error = self.loss_prime(y_train[j], output)
                #passing the gradient/ error to reversed layers in backward pass which in turn updates our weights and biases finally
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

In [67]:
# reading the Boston Dataset from sklearn
from sklearn import datasets

boston = datasets.load_boston()

In [68]:
#input dataset 
X = boston.data
X = X.reshape(X.shape[0],1,X.shape[1])
print(X.shape)

(506, 1, 13)


In [69]:
#normalizing the X data using min-max 
for i in range(X.shape[2]):
    for j in range(X.shape[0]):
        X[:,:,i][j,0] = (X[:,:,i][j,0] - min(X[:,:,i]))/(max(X[:,:,i]) - min(X[:,:,i]))

In [70]:
# true output y 
y = boston.target
y = y.reshape(y.size, 1)
print(y.shape)

(506, 1)


In [71]:
# normalising y using min-max
for i in range(y.size):
    y[i,0] = (y[i,0] - min(y[:,0]))/(max(y[:,0])-min(y[:,0]))

In [72]:
from sklearn.model_selection import train_test_split
# splitting dataset into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

(a) Part

In [91]:
# creating a network for (a) part
net1 = Network()
net1.add(Layer(13, 1)) # adding layer1 with input neuron same as number of features in each sample (i.e 13) and output neurons as mentioned in the ques(i.e. 1)
net1.add(ActivationLayer(linear, linear_prime)) # activating our previous layer1 with linear activation function

In [92]:
# training the model using training dataset
net1.use_loss(mean_square_loss, mean_square_loss_prime) # Mean square loss to be used as Loss in this case
net1.fitting(X_train, y_train, epochs=1200, learning_rate=1e-10) #fitting our training data to network model

In [93]:
# testing our model using test dataset
y_pred_a = net1.prediction(X_test)
print(y_pred_a)  #predicted output

[array([[-0.4072189]]), array([[-0.1160333]]), array([[-0.08069582]]), array([[-0.31608353]]), array([[0.16411197]]), array([[-0.43269683]]), array([[-0.37499409]]), array([[-0.26955179]]), array([[0.09235116]]), array([[-0.39450549]]), array([[-0.41993058]]), array([[-0.58715146]]), array([[-0.35249193]]), array([[-0.48042762]]), array([[-0.1954203]]), array([[-0.43893545]]), array([[-0.52223876]]), array([[-0.38349842]]), array([[-0.35087838]]), array([[-0.44439491]]), array([[-0.49344794]]), array([[0.12634462]]), array([[0.22432972]]), array([[-0.31089985]]), array([[-0.37477409]]), array([[-0.33121564]]), array([[-0.07442016]]), array([[-0.42672223]]), array([[-0.13267866]]), array([[0.01834049]]), array([[-0.10359189]]), array([[-0.12049787]]), array([[-0.28504253]]), array([[-0.21177302]]), array([[-0.48650579]]), array([[-0.15646634]]), array([[-0.40214155]]), array([[0.13004726]]), array([[-0.52813207]]), array([[-0.07258155]]), array([[-0.21018066]]), array([[-0.39035634]]), 

In [94]:
mean_square_loss(y_test, y_pred_a)  #calculating mean square loss for out predicted output of (a) part

0.7139387046743337

(b) Part

In [95]:
# creating a network for (b) part
net2 = Network()
net2.add(Layer(13,13)) # adding layer1 with input neuron same as number of features in each sample (i.e 13) and output neurons as mentioned in the ques(i.e. 13)
net2.add(ActivationLayer(sigmoid, sigmoid_prime)) # activating our previous layer1 with sigmoid activation function
net2.add(Layer(13, 1)) # adding layer2 with input neuron same as number of output neurons in layer1 (i.e 13) and output neurons as mentioned in the ques(i.e. 1)
net2.add(ActivationLayer(linear, linear_prime)) # activating our previous layer2 with linear activation function

In [96]:
# training the model using training dataset
net2.use_loss(mean_square_loss, mean_square_loss_prime) # Mean square loss to be used as Loss in this case
net2.fitting(X_train, y_train, epochs=1200, learning_rate=1e-10) #fitting our training data to network model

In [97]:
# testing our model using test dataset
y_pred_b = net2.prediction(X_test)
print(y_pred_b) #predicted output

[array([[0.37927085]]), array([[0.37904826]]), array([[0.32987756]]), array([[0.39571931]]), array([[0.25940893]]), array([[0.41929285]]), array([[0.35263419]]), array([[0.38781685]]), array([[0.25408689]]), array([[0.34555893]]), array([[0.33602521]]), array([[0.4088051]]), array([[0.37538963]]), array([[0.41849416]]), array([[0.34331027]]), array([[0.38384299]]), array([[0.40997477]]), array([[0.37525566]]), array([[0.4215443]]), array([[0.42468084]]), array([[0.4231319]]), array([[0.34943254]]), array([[0.33655202]]), array([[0.41255628]]), array([[0.36921957]]), array([[0.42326478]]), array([[0.42288841]]), array([[0.35527183]]), array([[0.32627633]]), array([[0.38137645]]), array([[0.38612713]]), array([[0.33082052]]), array([[0.34454472]]), array([[0.3245009]]), array([[0.36521835]]), array([[0.36034427]]), array([[0.37864827]]), array([[0.3173999]]), array([[0.39996547]]), array([[0.39121282]]), array([[0.32381166]]), array([[0.36982535]]), array([[0.26973385]]), array([[0.38819

In [98]:
mean_square_loss(y_test, y_pred_b) #calculating mean square loss for out predicted output of (b) part

0.06745321656745663

(c) part

In [99]:
# creating a network for (c) part
net3 = Network()
net3.add(Layer(13,13)) # adding layer1 with input neuron same as number of features in each sample (i.e 13) and output neurons as mentioned in the ques(i.e. 13)
net3.add(ActivationLayer(sigmoid, sigmoid_prime)) # activating our pervious layer1 with sigmoid activation function
net3.add(Layer(13,13)) # adding a layer2 with input neuron same as number of output neurons in layer1 (i.e 13) and output neurons as mentioned in the ques(i.e. 13)
net3.add(ActivationLayer(sigmoid, sigmoid_prime)) # activating our pervious layer2 with sigmoid activation function
net3.add(Layer(13, 1)) # adding a layer3 with input neuron same as number of output neurons in layer2 (i.e 13) and output neurons as mentioned in the ques(i.e. 1)
net3.add(ActivationLayer(linear, linear_prime)) # activating our pervious layer3 with linear activation function

In [100]:
# training the model using training dataset
net3.use_loss(mean_square_loss, mean_square_loss_prime) # Mean square loss to be used as Loss in this case
net3.fitting(X_train, y_train, epochs=1200, learning_rate=1e-10) #fitting our training data to network model

In [101]:
# testing our model using test dataset
y_pred_c = net3.prediction(X_test)
print(y_pred_c) #predicted output

[array([[0.50573778]]), array([[0.51293537]]), array([[0.50905894]]), array([[0.51146517]]), array([[0.53723796]]), array([[0.51254498]]), array([[0.51502823]]), array([[0.51463652]]), array([[0.50300632]]), array([[0.51686137]]), array([[0.51632813]]), array([[0.51543154]]), array([[0.51469238]]), array([[0.52003883]]), array([[0.54202211]]), array([[0.51594184]]), array([[0.51360211]]), array([[0.51146043]]), array([[0.51177527]]), array([[0.51911398]]), array([[0.52240692]]), array([[0.54269787]]), array([[0.54156275]]), array([[0.51201405]]), array([[0.50506791]]), array([[0.51200786]]), array([[0.51556929]]), array([[0.51705685]]), array([[0.50902537]]), array([[0.54473579]]), array([[0.5142907]]), array([[0.50685059]]), array([[0.51598694]]), array([[0.54139268]]), array([[0.50600121]]), array([[0.51515065]]), array([[0.51686145]]), array([[0.54198788]]), array([[0.51619074]]), array([[0.51397842]]), array([[0.51146068]]), array([[0.5138457]]), array([[0.50064496]]), array([[0.51

In [102]:
mean_square_loss(y_test, y_pred_c) #calculating mean square loss for out predicted output of (c) part

0.040580350409270605

### References:
1. https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65
2. https://youtube.com/playlist?list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3