In [1]:
import pandas as analytics
import numpy as maths
from math import exp
from numba import cuda , jit
import warnings 
warnings.filterwarnings("ignore")

In [2]:
df_data = analytics.read_csv('data1.csv',header = None , names = ['x0','x1','y'])
df_data

Unnamed: 0,x0,x1,y
0,22.930,-20.9020,1
1,12.410,2.0033,1
2,-20.686,-33.3030,1
3,46.974,-13.5550,1
4,41.965,20.3680,1
...,...,...,...
195,-29.232,28.4410,-1
196,-10.159,7.3065,-1
197,-32.026,14.9380,-1
198,-49.533,-11.1280,-1


In [36]:
class neural_network :
    
    def __init__(self,df_data , number_of_neurons = [2,1], epsilon = 5e-2, alpha = 0.2, hidden_function = 'relu', output_function = 'sigmoid') :

        # if number_of_hidden_layers + 1 != len(number_of_neurons) :
        #     raise ValueError("Given structure is not correct ! Number of Hidden Layers stated and Number of neurons in hidden layers are not matching")
        
        self.number_of_hidden_layers = len(number_of_neurons) - 1
        self.number_of_neurons = number_of_neurons
        self.df_data = df_data
        self.epsilon = epsilon 
        self.N = df_data.shape[0]
        self.alpha = alpha

        functions_list = {'relu':self.relu , 'sigmoid' : self.sigmoid}
        self.hidden = functions_list[hidden_function]
        self.output = functions_list[output_function]


    def relu(self,x) :
        """Hidden Activation Function"""
        return max(0,x)
    
    def sigmoid(self,x):
        """Output Function"""
        return 1 / (1 + exp(-x))

    def calculate_derivative(self, data, func):
        if func == self.relu :
            if data > 0 : data = 1
            derivative = func(data)

        if func == self.sigmoid :
            derivative = func(data) * ( 1- func(data))

        return derivative

    
    def initialisation(self):
        weights = []
        bias = []
        for l in range(self.number_of_hidden_layers + 1):
            if l == 0 : previous_layer = df_data.shape[1] - 1
            else : previous_layer = self.number_of_neurons[l-1]
            present_layer = self.number_of_neurons[l]
            weights.append(maths.random.random((present_layer,previous_layer)))
            bias.append(maths.random.random((present_layer,1)))
        self.weights = weights
        self.bias = bias


    def forward_propagation(self,x):
        activations = []
        activations.append(x)   # inputdata point is the initial activation
        
        hidden_layers = []
        
        for l in range(self.number_of_hidden_layers):
            neurons = self.weights[l] @ activations[l] + self.bias[l]   # it actually should be activations[l+1] but because indexing in python starts from 0, so it is activations[l]
            activation = maths.matrix([self.hidden(float(neuron)) for neuron in neurons]).reshape(-1,1)
            hidden_layers.append(neurons)
            activations.append(activation)
   
        data = self.weights[-1] @ activations[-1] + self.bias[-1]
        activations.append(self.output(data))
        
        self.hidden_layers = hidden_layers
        self.activations = activations


    def backward_propagation(self) :
      
        deltas = []
        data = self.weights[-1] @ self.activations[-2] + self.bias[-1]
        diff_loss_wrt_output = self.activations[-1]
        delta = maths.matrix(maths.multiply(diff_loss_wrt_output,  self.calculate_derivative(data,self.output))).reshape(-1,1)
        deltas.append(delta)
        
        grad_weights = []
        grad_biases = []
        for l in range(self.number_of_hidden_layers , -1, -1 ): # it is actually running from Output layer to the first hidden layer. Due to indexing convention, loop starts from number_of_hidden_layers and goes till 0
            grad_weight = deltas[0] @ self.activations[l].T
            grad_bias = deltas[0]

            d = self.weights[l].T @ deltas[0]
            der = maths.matrix([self.calculate_derivative(float(i),self.hidden) for i in self.hidden_layers[l-1]]).reshape(-1,1)
            delta = maths.multiply( d , der ).reshape(-1,1)
            deltas.insert(0,delta)

            grad_weights.insert(0,grad_weight)
            grad_biases.insert(0,grad_bias)
    
        return grad_weights, grad_biases


    def has_converged(self, prev_weights, prev_bias) :
        self._converged = (maths.linalg.norm(maths.matrix(self.weights[0]) - maths.matrix(prev_weights[0])) < self.epsilon) and (maths.linalg.norm(maths.matrix(self.bias[0]) - maths.matrix(prev_bias[0])) < self.epsilon)


    def update_weights(self):
        
        prev_weights = [w + 1 for w in self.weights]
        prev_bias = [b + 1 for b in self.bias]
        
        self._converged = False
        
        while not self._converged:
            prev_weights = self.weights.copy()
            prev_bias = self.bias.copy()
            
            for m in range(self.N) :
                x = maths.matrix(self.df_data.iloc[m][:-1]).reshape(-1,1)
                y = self.df_data.iloc[m][-1]
                self.forward_propagation(x)
                grad_weights, grad_bias = self.backward_propagation()

                for l in range(self.number_of_hidden_layers + 1) :
                    self.weights[l] = self.weights[l] - self.alpha/self.N * grad_weights[l]
                    self.bias[l] = self.bias[l] - self.alpha/self.N * grad_bias[l]
                    
            
            self.has_converged(prev_weights, prev_bias)

In [37]:
neurons_in_each_layer = [5,3,5,2,1]
epsilon = 5e-2
alpha = 0.2
hidden_function = 'relu'
output_function = 'sigmoid'




ann = neural_network(df_data ,
                     number_of_neurons = neurons_in_each_layer,
                     epsilon = epsilon,
                     alpha = alpha,
                     hidden_function = hidden_function,
                     output_function = output_function
                    )

ann.initialisation()
ann.update_weights()

for i in range(len(neurons_in_each_layer)):
    print("LAYER",i+1)
    print("Weights")
    print(ann.weights[i])
    print("Bias")
    print(ann.bias[i].T)
    print()

LAYER 1
Weights
[[0.93844702 0.84165307]
 [0.49173185 0.4141079 ]
 [0.56771668 0.23704826]
 [0.72262624 0.09342932]
 [0.3993574  0.01305375]]
Bias
[[0.67198154 0.61487117 0.30290067 0.83504981 0.99293454]]

LAYER 2
Weights
[[0.12872233 0.48148709 0.14455145 0.10016372 0.63148733]
 [0.43418351 0.36104668 0.7122866  0.51279804 0.77343812]
 [0.17235311 0.71760145 0.12182123 0.91683855 0.86996805]]
Bias
[[0.69267888 0.13546792 0.27159113]]

LAYER 3
Weights
[[0.87515209 0.18885136 0.8931342 ]
 [0.77698471 0.52808508 0.50576448]
 [0.14259524 0.30623374 0.9653051 ]
 [0.65597982 0.66142878 0.48521389]
 [0.5623395  0.55239742 0.37136989]]
Bias
[[0.11754939 0.33368122 0.28752836 0.74304545 0.70793896]]

LAYER 4
Weights
[[0.56486568 0.18096658 0.65559309 0.92291038 0.84322655]
 [0.95786665 0.98938328 0.68856124 0.17394717 0.14821441]]
Bias
[[0.38841874 0.61866499]]

LAYER 5
Weights
[[0.79615602 0.08760867]]
Bias
[[0.64117347]]

