In [1]:
import numpy as np
import random
import pandas as pd


def dataset(data):
    random.seed(32)
    data_ = list(zip( data['idx'], data['forecasted_performance'],data['hs_sleep'], data['hs_study'], data['prev_scores'], data['practice'], data['extracurricular_activities']))
    random.shuffle(data_)
    idx, y, x1, x2, x3, x4, x5 = zip(*data_)
    x1 = list(x1)
    x2 = list(x2)
    x3 = list(x3)
    x4 = list(x4)
    x5= list(x5)
    y = list(y)
    for i in range(len(x5)):
        if x5[i] == False:
            x5[i] = 0
        else:
            x5[i] = 1

    X = np.zeros((len(x1), 5))
    for i in range(len(X)):
        X[i][0] = x1[i]
        X[i][1] = x2[i]
        X[i][2] = x3[i]
        X[i][3] = x4[i]
        X[i][4] = x5[i]
    
    Y = np.zeros((len(x1), 1))
    for i in range(len(Y)):
        Y[i] = y[i]

    return X, Y


class Network:
    def __init__(self, layers_, activation_functions, activation_grads_):
        np.random.seed(42)
        self.layers = layers_
        self.W = [np.random.normal(0, 0.2, (layers_[i+1], layers_[i])) for i in range(len(layers_)-1)]
        self.B = [np.random.normal(0, 0.2, (layers_[i], 1)) for i in range(1, len(layers_))]
        self.activations = activation_functions
        self.activation_grads = activation_grads_
        

    def evaluate(self, X):
        values_a = [np.zeros((self.layers[i], 1)) for i in range(len(self.layers))]
        values_z = [np.zeros((self.layers[i], 1)) for i in range(len(self.layers))]

        for layer in range(len(values_z)):
            if layer == 0:
                for neuron in range(len(values_z[layer])):
                    values_z[layer][neuron] = X[neuron]
            else:
                values_a[layer] = self.W[layer-1] @ values_z[layer-1] + self.B[layer-1]
                values_z[layer] = self.activations[layer-1](np.copy(values_a[layer]))
        return values_a, values_z

    def backpropagation(self, values_a, values_z, Y, loss_grad):
        W_grad = [np.zeros((len(self.W[i]), len(self.W[i][0]))) for i in range(len(self.W))]
        B_grad = [np.zeros((len(self.B[i]), 1)) for i in range(len(self.B))]
        delta = [np.zeros((len(values_z[i]), 1)) for i in range(len(values_z))]
        err = loss_grad(Y, values_z[-1])  
        delta[-1] = err @ self.activation_grads[-1](values_a[-1])
        
        W_grad[-1] = delta[-1] @ np.transpose(values_z[-2]) 
        B_grad[-1] = delta[-1]
        for layer in range(len(self.layers)-1, 1, -1):
            delta[layer-1] =  np.diag((self.activation_grads[layer-1](values_a[layer-1])).flatten()) @ np.transpose(self.W[layer-1]) @ delta[layer]
            W_grad[layer - 2] = delta[layer-1] @ np.transpose(values_z[layer-2])
            B_grad[layer-2] = delta[layer-1]
        return W_grad, B_grad
    
    def gradient_descent(self, data_train, learning_rate, epochs): 
        for epoch in range(epochs):
            W_grad_total = [np.zeros_like(w) for w in self.W]
            B_grad_total = [np.zeros_like(b) for b in self.B]
            for X, Y in data_train:
                a, z  = self.evaluate(X)
                W_grad, B_grad = self.backpropagation(a, z, Y, MSE_grad)
                for i in range(len(W_grad_total)):
                    W_grad_total[i] += W_grad[i]
                    B_grad_total[i] += B_grad[i]
            
            for layer in range(len(self.W)):
                self.W[layer] -= learning_rate * (W_grad_total[layer]/len(data_train))
                self.B[layer] -= learning_rate * B_grad_total[layer]


    def stochastic_gradient_descent(self, data_train, learning_rate, epochs):
        for epoch in epochs:
            for X, Y in data_train:
                a, z  = self.evaluate(X)
                W_grad, B_grad = self.backpropagation(a, z, Y, MSE_grad)
                for layer in range(len(self.W)):
                    self.W[layer] -= learning_rate * W_grad[layer]
                    self.B[layer] -= learning_rate * B_grad[layer]


    
    def mini_baches(self, data_train, learning_rate, epochs, batch_size):
        for epoch in epochs:
            np.random.shuffle(data_train)
            mini_batches = [data_train[k:k+batch_size] for k in range(0, len(data_train), batch_size)]
            for mini_batch in mini_batches:
                W_grad_total = [np.zeros_like(w) for w in self.W]
                B_grad_total = [np.zeros_like(b) for b in self.B]
                for X, Y in mini_batch:
                    a, z  = self.evaluate(X)
                    W_grad, B_grad = self.backpropagation(a, z, Y, MSE_grad)
                    for i in range(len(W_grad_total)):
                        W_grad_total[i] += W_grad[i]
                        B_grad_total[i] += B_grad[i]
                
                for layer in range(len(self.W)):
                    self.W[layer] -= learning_rate * W_grad_total[layer]
                    self.B[layer] -= learning_rate * B_grad_total[layer]



def relu(value):
    for i in range(len(value)):
        value[i] = max(0, value[i])
    return  value

def linear(x):
    return x

def MSE_grad(real_y, predict):
    return 2*(predict - real_y)


def relu_grad(x):
    return np.where(x < 0, 0, 1)


def linear_grad(x):
    return np.ones_like(x)

def error(data, predict):
    return np.mean((data - predict)**2)

def main():
    path = 'datasets/Student_Performance_DEV.csv'
    y_predict = []
    data = pd.read_csv(path)
    X, Y = dataset(data)
    NN = Network([5, 2, 1], [relu, linear], [relu_grad, linear_grad])
    data_train = [[X[i], Y[i]] for i in range(int(0.8*len(X)))]
    NN.gradient_descent(data_train, 0.01, 3)



    for i in range(int(0.8 * len(X)), len(X)):
        y_predict.append(NN.evaluate(X[i])[1][-1][0])
    err = error(Y[int(0.8 * len(X)):], y_predict)

    print("error", err)
    
        

main()

error 9123360941.633387
