## Building a Neural Network from Scatch for a **Regression** dataset



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
X = np.random.rand(1000, 2) * 100  # Features between 0 and 100
y = 3 * X[:, 0] + 2 * X[:, 1]**2 + np.random.randn(1000) * 50  # Quadratic relationship + noise

df = pd.DataFrame(X, columns=['Feature_1', 'Feature_2'])
df['Target'] = y

df.head()

Unnamed: 0,Feature_1,Feature_2,Target
0,90.384068,84.076766,14390.132624
1,88.994777,61.341452,7701.691884
2,88.45763,0.764275,320.497111
3,56.73186,27.814022,1695.168244
4,92.171436,22.585454,1375.717101


In [None]:
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()
standard_df = scalar.fit_transform(df)
df = pd.DataFrame(standard_df, columns =['x1', 'x2','y'])


In [None]:
input_cols = df.iloc[:,0:2]
output_cols = df.iloc[:,2]

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(input_cols,output_cols,test_size=0.2,random_state=42)

In [None]:
X_tr = np.array(X_train.iloc[:,:2])
y_tr = np.array(y_train.iloc[:])

X_te = np.array(X_test.iloc[:,:2])
y_te = np.array(y_test.iloc[:])

In [None]:
y_tr = y_tr.reshape(-1,1)
y_te = y_te.reshape(-1,1)

In [None]:
class NeuralNetwork:
  def __init__(self,layers_dims):
    self.layers_dims = layers_dims

    self.weights = []
    self.biases = []

    for i in range(len(layers_dims)-1):
      self.weights.append(np.ones((layers_dims[i+1],layers_dims[i]))*0.01)      # Initializing Weights and bias matrix
      self.biases.append(np.zeros((1,layers_dims[i+1])))                        # For (2,2,1) layer structure, there will be two weight and bias matrix
                                                                                # Weights matrices order -> (2,2), (1,2)  Bias matrices order -> (1,2), (1,1)

  def forward_prop(self,X):                                                     # Forward Propogation, returns list of activations(output of each layer)
    activations = [X]

    for W,b in zip(self.weights,self.biases):                                   # Iterating thorugh each layer (2 -> 2 -> 1)
      Z = np.dot(activations[-1],W.T) + b                                       # (W.T * X) + b, Here X is activation matrix
      activations.append(Z)                                                     # 'activations' list contains 3 arrays
                                                                                # of orders -> (1000,2):Input activation, (1000,2):Hidden layer activation, (1000,1):Output activation
    return activations


  def loss(self,y_true,y_pred):                                                 # Helper function used to compute loss
    return np.mean((y_true-y_pred)**2)


  def loss_derivative(self,y_true,y_pred):                                      # Helper function used to compute derivative of Loss wrt y_hat (dL/dy_hat)
    return -2*(y_true-y_pred)/y_true.shape[0]


  def backward_prop(self,y_true,activations):                                   # Back Propogation, returns list of derivative of Loss function wrt weights and biases
    dA = self.loss_derivative(y_true,activations[-1])                           # dA = -2 * (y - y_hat)
    m = y_true.shape[0]
    dWs = []
    dbs = []

    for i in reversed(range(len(self.weights))):                                # Reverse iterating through the layer (1 -> 2 -> 2)
      dZ = dA                                                                   # Activation function is Linear ( Z (W.T * X + B) -> activation function -> A (activation) )
                                                                                # (dZ/dA = 1 for linear activation function)
      dw = np.dot(dZ.T,activations[i]) / m                                      # Calculating derivatives of weights(dL/dw)
      db = np.sum(dZ,axis=0,keepdims=True) / m                                  # Calculating derivatives of biases(dL/db)

      dA = np.dot(dZ,self.weights[i])                                           # Updating dA for calculation of coeff. for hidden layers

      dWs.insert(0,dw)
      dbs.insert(0,db)

    return dWs,dbs


  def update_parameters(self,lr,dws,dbs):                                       # Updating Coefficents using Gradient Descent
    for i in range(len(self.weights)):
      self.weights[i] = self.weights[i] - lr*dws[i]                             # W_new = W_old + learning_rate * dL/dW
      self.biases[i] = self.biases[i] - lr*dbs[i]                               # b_new = b_old + learning_rate * dL/dB


  def fit(self,X,y,epochs,learning_rate):                                       # Training function running all of the above algorithms
    for i in range(epochs):                                                     # epoch no. of times until convergence
      activations = self.forward_prop(X)

      loss = self.loss(y,activations[-1])

      dws,dbs = self.backward_prop(y,activations)

      self.update_parameters(learning_rate,dws,dbs)

      if i%1000 == 0:                                                           # Calculating loss at every thousand epoch
        print(f"epoch : {i} ->  loss : {loss} ")

  def predict(self,X):                                                          # Final prediction functions
    activations = self.forward_prop(X)                                          # Calculating final activations using forward propogation
    return activations[-1]                                                      # Returns the last activation array (Output array)


In [None]:
nn = NeuralNetwork([2,4,1])

In [None]:
nn.fit(X_tr,y_tr,10000,0.5)

epoch : 0 ->  loss : 0.9842888691352663 
epoch : 1000 ->  loss : 0.976644117618508 
epoch : 2000 ->  loss : 0.9012895521138159 
epoch : 3000 ->  loss : 0.46116749539335017 
epoch : 4000 ->  loss : 0.08401547567700629 
epoch : 5000 ->  loss : 0.06269418936211132 
epoch : 6000 ->  loss : 0.06240148420961846 
epoch : 7000 ->  loss : 0.06239510990343265 
epoch : 8000 ->  loss : 0.062394731730983685 
epoch : 9000 ->  loss : 0.062394700706619996 


In [None]:
y_pred = nn.predict([X_te])
y_pred=y_pred.reshape(200,1)

In [None]:
from sklearn.metrics import r2_score
print(r2_score(y_pred,y_te))

0.9418770442701253
