In [8]:
import numpy as np


# Using fullbatch size (The whole dataset in one batch)
class NeuralNetwork():

    def __init__(self,X,y,n_hidden_neurons,output_act_fn = 'linear',error_fn = 'mse'):
        self.X = X
        self.y = y
        self.input_neurons = X.shape[1] #number of cols
        self.n_hidden_neurons = n_hidden_neurons
        self.output_act_fn = output_act_fn
        self.error_fn = error_fn

        # Initialize weights  and biases with random values
        np.random.seed(42)
        self.input_hidden_weights = np.random.randn(self.input_neurons, self.n_hidden_neurons)
        self.n_hidden_biases = np.zeros(self.n_hidden_neurons)
        self.hidden_output_weights = np.random.randn(n_hidden_neurons,1)
        self.output_bias = np.zeros(1)

    def activation_fn(self,x,act_fn): # x->refers to the input values
        if act_fn == 'sigmoid':
            return 1/(1+np.exp(-x))
        elif act_fn == 'relu':
            return np.maximum(0,x)
        elif act_fn == 'linear':
            return x
        else:
            return Exception("Unknown activation function")
    
    def activation_derv(self,x,act_fn):# x->refers to the input values
        '''
        derv(Sigmoid(x)) = sigmoid(x) * 1 - sigmoid(x)
        derv(relu(x)) = 1  if x > 0, 0  if x ≤ 0

        derv with respect to (x)

        '''

        if act_fn == 'sigmoid':
            s = self.activation_fn(x, act_fn)
            return s * (1-s)
        if act_fn == 'relu':
            return np.where(x>0,1,0)
        elif act_fn == 'linear':
            return 1

    def forward_pass(self,X): # Returns the first prediction without any optimization

        # Hidden layer
        self.hidden = self.activation_fn((X @ self.input_hidden_weights) + self.n_hidden_biases, 'relu')
        # Output layer
        self.output = self.activation_fn((self.hidden @ self.hidden_output_weights) + self.output_bias, self.output_act_fn)

        return self.output

    def error_estimation(self,y_true,y_pred):

        if self.error_fn=='mse':
            return np.mean((y_true - y_pred)**2)
        
        elif self.error_fn=='log loss':
            eps = 1e-15 # to prevent log(0) -> inf
            return -np.mean(y_true * np.log(y_pred + eps) + (1 - y_true) * np.log(1 - y_pred + eps))
    
    def error_estimation_derv(self, y_pred, y_true): # derv with respect to y_pred
        
        if self.error_fn=='mse':
            return -2 * (y_true - y_pred)/y_true.size
        
        elif self.error_fn == 'log loss':
            eps = 1e-15 # to avoid division by 0
            return (y_pred - y_true + eps) / ((y_pred+eps) * (1 - y_pred + eps))

    def backpropagation(self,X,y_true,y_pred,learning_rate):
        
        # Calculate Gradient using the chain rule
        # d(error)/d(z) = d(error)/d(pred) * d(pred)/d(z) -> where z is the input to the activation function (weight*x + b)
        self.output_error = self.error_estimation_derv(y_pred, y_true) * self.activation_derv(y_pred,self.output_act_fn)

        # Update output layer Weight and bias
        self.output_bias -= learning_rate * np.sum(self.output_error,axis=0)
        self.hidden_output_weights -= learning_rate * (self.hidden.T @ self.output_error) #(self.hidden.T @ self.output) How each hidden neuron should change
        #(self.hidden.T @ self.output) -> d(error)/wij then multiply it by learning rate, .T to make it work (Incompatible sizes) 

        # Update hidden layer weights and biases
        self. hidden_error = (self.output_error @ self.hidden_output_weights.T)  * self.activation_derv(self.hidden,'relu')# *self.activation_derv(self.hidden,'relu') because we are backpropagating through the activation function
        self.n_hidden_biases -= learning_rate*np.sum(self.hidden_error,axis=0)
        self.input_hidden_weights -= learning_rate*(X.T @ self.hidden_error) # d(error)/d(input_hidden_weights)
        #(self.output_error @ self.hidden_output_weights.T) means how much did the hidden neuron contributed to the output error (Scale of each weight to be updated)
        #d(error)/d(hidden_neuron_weights)

    def train(self,X,y,learning_rate,epochs):

        for epoch in range(epochs):
            y_pred = self.forward_pass(X)
            self.backpropagation(X,y,y_pred,learning_rate)
            if epoch%1000 == 0:
                print(f"Epoch: {epoch}, Loss: {self.error_estimation(y,y_pred)}")

    def predict(self, X):
        if self.error_fn == 'mse':
            return self.forward_pass(X)
        elif self.error_fn == 'log loss':
            return np.where(self.forward_pass(X)>0.5,1,0)

In [9]:
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=5000, n_features=5, noise=50, random_state=42)
y = y.reshape(-1,1)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y , random_state=42)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
nn = NeuralNetwork(X_train,y_train,128)
nn.train(X_train, y_train,learning_rate=0.001,epochs=5000)

Epoch: 0, Loss: 26410.43126798775
Epoch: 1000, Loss: 2342.38964105498
Epoch: 2000, Loss: 2304.659287732356
Epoch: 3000, Loss: 2278.4600310271503
Epoch: 4000, Loss: 2255.8950609560025


In [19]:
from sklearn.metrics import r2_score

y_pred = nn.predict(X_test)

print(r2_score(y_pred, y_test))

0.8808559784087395


In [20]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print(r2_score(y_pred, y_test))

0.8837168363777849


- The Hand made NN explains 88.08% of the variance in the data while LinearRegression model explains 88.37% of the data. Pretty close ratio