In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

Activation Functions

In [238]:
# Heaviside step function 

def heavyside(x):
    if (x >= 0):
        return 1
    else :
        return 0
        
# Rectified error Linear Unit function

def relu(x):
    return max(0,x)

# Sigmoid function

def activation_sigmoid(t):
        return 1/(1+np.exp(-t))

def sigmoid(x):
    return 1/(1+np.exp(-x))

def softmax(t):
    return np.exp(t)/np.sum(np.exp(t))

# Gassian error Linear Unit GeLU function

Neural Network

In [35]:
data = load_diabetes()
X = data['data']
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [276]:
class NeuralNetwork :

    def __init__(self, X_train, y_train):
        self.layers_wieghts = []
        self.layers_bias = []
        self.X_train = X_train
        self.y_train = y_train
        n = X_train.shape[1]
        self.layers_wieghts.append(np.random.rand(n))
        self.layers_bias.append(np.random.rand(1))

    def activation_sigmoid(self, t):
        return 1/(1+np.exp(-t))

    def activation_sigmoid_derivative(self, t):
        return np.exp(-t)/(1+np.exp(-t))**2

    def loss(self, X_train, y_train):
        y_hat = self.out(X_train)
        return mean_squared_error(y_hat, y_train)

    def loss_for_a_sample(self,x,y):
        y_hat = self.out(x)
        return np.mean((y_hat-y)**2)
                                  
    def integrate_layer(self,layer_index, x):
        out = self.layers_wieghts[layer_index]@x + self.layers_bias[layer_index]
        return  self.activation_sigmoid(out)
    
    def add_layer(self, layer_size):
        self.layers_wieghts.append(np.random.rand(layer_size, len(self.layers_bias[-1])))
        self.layers_bias.append(np.random.rand(layer_size))
        
    def update_weights(self, layer_index, delta_w):
        self.layers_wieghts[layer_index]  = self.layers_wieghts[layer_index]  + delta_w

    def update_bias(self, layer_index, delta_b):
        self.layers_bias[layer_index]  = self.layers_bias[layer_index] + delta_b 

    def delete_layer(self):
        if len(self.layers) > 1:
            self.layers.pop()
        else :
            print("No Hidden Layers to delete")

    def backward(self, X_):
        alpha = 100000
        activations = self.get_activations(X_)
        l = len(self.layers_bias)

        # Inititaion Output Layer
        E_d = (y_train[0] - self.layers_wieghts[l-1] @ activations[l-2] - self.layers_bias[l-1])
        E_d = E_d*self.activation_sigmoid_derivative(E_d)
        delta_w = -alpha*2* E_d @ activations[l-2].T.reshape(-1, len(activations[l-2]))
        delta_b = -alpha*2* E_d
        delta_a_l = -alpha*2*E_d @ self.layers_wieghts[l-1]
        self.update_weights(l-1, delta_w)
        self.update_bias(l-1, delta_b)
        delta_a_l = -alpha*2*E_d @ self.layers_wieghts[l-1]
        
        # Hidden Layers
        for i in range(l-2,0,-1):
            delta_w = -(alpha*2* delta_a_l).reshape(len(delta_a_l),1) @ activations[i-1].T.reshape(-1, len(activations[i-1]))
            delta_b = -alpha*2* delta_a_l
            self.update_weights(i, delta_w)
            self.update_bias(i, delta_b)
            delta_a_l = -alpha*2*delta_a_l @ self.layers_wieghts[i]
            delta_a_l = delta_a_l*self.activation_sigmoid_derivative(delta_a_l)
        
        # Input Layer
        delta_w = -alpha*2* delta_a_l.reshape(len(delta_a_l),1) @ X_.T.reshape(-1,len(X_))
        delta_b = -alpha*2* delta_a_l
        self.update_weights(0, delta_w)
        self.update_bias(0, delta_b)
    
    def get_activations(self, X_):
        l = len(self.layers_bias)
        activations = []
        previous_out = self.integrate_layer(0, X_).copy()
        activations.append(previous_out)
        for i in range(1,l):
            current_out = self.integrate_layer(i,previous_out).copy()
            previous_out = current_out.copy()
            activations.append(previous_out)
        return activations

    def forward(self, X_):
        l = len(self.layers_bias)
        previous_out = self.integrate_layer(0, X_).copy()
        for i in range(1,l):
            current_out = self.integrate_layer(i,previous_out).copy()
            previous_out = current_out.copy()
        return previous_out
    
    def train(self):
        max_epochs = 100
        for e in range(max_epochs):
            for i in range(len(self.X_train)):
                self.backward(X_train[i])
            print(mean_squared_error(activation_sigmoid(y_test),self.predict(X_test)))

    def predict(self, X_test):
        n = len(X_test)
        m = len(self.layers_bias[-1])
        prediction = np.empty((n,m))
        for i in range(n):
            out = self.forward(X_test[i])
            for j in range(m):
                prediction[i][j] = out[j]
        return prediction


In [277]:
nn = NeuralNetwork(X_train,y_train)
nn.add_layer(9)
nn.add_layer(8)
nn.add_layer(7)
nn.add_layer(6)
nn.add_layer(5)
nn.add_layer(3)
nn.add_layer(1)

In [278]:
nn.train()

0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.018537004985477622
0.01853700498

In [279]:
y_hat = nn.predict(X_test)

In [280]:
y_hat

array([[0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384932],
       [0.86384934],
       [0.86384934],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384932],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384933],
       [0.86384934],
       [0.863

In [281]:
mean_squared_error(activation_sigmoid(y_test),y_hat)

0.018537004985477622