In [139]:
import numpy as np

In [140]:
np.random.seed(42) # To generate the random number(Ensures Reproducibility)
X_ROW, X_COLUMN =[2,2000] # 2 features(rows) and 2000 samples (columns) 

#### 1. Generate Raw Data

##### i. Creating Input Dataset

In [141]:
X_RAW =np.random.rand(X_ROW, X_COLUMN)*100  # 2*2000 matrix that has values between 0-100
X_RAW

array([[37.45401188, 95.07143064, 73.19939418, ...,  6.89580164,
         5.70547212, 28.21870747],
       [26.17056837, 24.69787991, 90.62545805, ..., 39.45723153,
        52.99405869, 16.13673584]], shape=(2, 2000))

##### ii. Creating Output Data(Y_Raw)

In [142]:
Y_Raw =np.concatenate(([(X_RAW[0,:])+(X_RAW[1,:])],[(X_RAW[0,:])-(X_RAW[1,:])],np.abs([(X_RAW[0,:])-(X_RAW[1,:])])))
Y_Raw

array([[ 63.62458026, 119.76931055, 163.82485223, ...,  46.35303317,
         58.69953081,  44.35544331],
       [ 11.28344351,  70.37355073, -17.42606387, ..., -32.5614299 ,
        -47.28858658,  12.08197163],
       [ 11.28344351,  70.37355073,  17.42606387, ...,  32.5614299 ,
         47.28858658,  12.08197163]], shape=(3, 2000))

In [143]:
Y_Row, Y_Col = Y_Raw.shape
Y_Raw.shape

(3, 2000)

#### 2. Train_Test_Split

In [144]:
# Training 70% - Testing 30%
Train_rato= 0.7

In [145]:
Num_data_train = int(Train_rato*X_COLUMN)
X_RAW_Train = X_RAW[:,0:Num_data_train]
X_RAW_Test = X_RAW[:,Num_data_train:]
Y_RAW_Train = Y_Raw[:,0:Num_data_train]
Y_RAW_Test = Y_Raw[:,Num_data_train:]

#### 3. Standardization(Scaling Data)

In [146]:
class scalar: #stores the mean and std of each feature(rows)
  def __init__(self,mean,std):
    self.mean = mean
    self.std = std if std != 0 else 1 #Avoid division by zero.

def get_scalar(row): #Calculates mean and std.
  mean = np.mean(row)
  std =np.std(row)
  return scalar(mean,std) # returns instance of scalar class

def standarize(data,scalar):
  return(data-(scalar.mean)/scalar.std)
def unstandarize(data,scalar):
  return(data*scalar.std) + scalar.mean



#### 4. Applying Standardization on train test

In [147]:
X_scalers =[get_scalar(X_RAW_Train[row,:]) for row in range(X_ROW)]
X_Train = np.array([standarize(X_RAW_Train[row,:],X_scalers[row])for row in range(X_ROW)])


In [148]:
Y_scalers =[get_scalar(Y_RAW_Train[row,:]) for row in range(Y_Row)]
Y_Train = np.array([standarize(Y_RAW_Train[row,:],Y_scalers[row])for row in range(Y_Row)])


In [149]:
X_test = np.array([standarize(X_RAW_Test[row,:],X_scalers[row])for row in range(X_ROW)])
Y_test = np.array([standarize(Y_RAW_Test[row,:],Y_scalers[row])for row in range(Y_Row)])


In [150]:
print([X_Train[row,:].mean() for row in range(X_ROW)]) #Verifying 

[np.float64(48.11746678827335), np.float64(48.07886978975062)]


In [151]:
print([X_Train[row,:].std() for row in range(X_ROW)]) #Verifying

[np.float64(29.387072624266093), np.float64(28.779683508663084)]


In [152]:
print([Y_Train[row,:].mean() for row in range(Y_Row)])#Verifying
print([Y_Train[row,:].std() for row in range(Y_Row)])#Verifying

[np.float64(97.20783555165191), np.float64(0.0028536514424484746), np.float64(32.078745112385825)]
[np.float64(41.263859795310545), np.float64(41.00041847784436), np.float64(23.645246722265313)]


#### 4.Neural Network Construction
##### It consists of Layer using W = Weight Matrix, b = bais vector, Neural net

##### i. Defining the Neural Network Layer

In [153]:
#This layer class is used to define a single layer in a neural network.
#layer_index - Identifies which layer this is (e.g., input layer = 0, hidden layer = 1, etc.).
# is_output - Boolean; True if this is the output layer.
# input_dim - Number of neurons in the previous layer.
# output_dim - Number of neurons in this layer.
# activation - Activation function to be applied (e.g., ReLU, Sigmoid).
class layer:
  def __init__(self,layer_index,is_output,input_dim,output_dim,activation):
    self.layer_index =layer_index
    self.is_output = is_output
    self.input_dim = input_dim
    self.output_dim = output_dim
    self.activation = activation
    self.W = np.random.randn(output_dim, input_dim) * 0.01  # Weight initialization
    self.b = np.zeros((output_dim, 1))  # Bias initialization
    self.dW = np.zeros_like(self.W)  # Gradient for W (initialized to 0)
    self.db = np.zeros_like(self.b)  # Gradient for b (initialized to 0)

#Initializing Weights and Biases for layers except input layer.
# formula W^l = randn(o/p_dim,ip_dim)/sqrt(2/ip_dim)
    if layer_index != 0:
      self.W = np.random.randn(output_dim, input_dim) * np.sqrt(2/input_dim)
      self.b = np.random.randn(output_dim, 1)* np.sqrt(2/input_dim) #(np.sqrt(2/i/p dim) is he initialization, good for RelU-based networks)


##### ii. Building the Neural Network

In [154]:
layers_dim = (X_ROW, 4, 4, Y_Row)  # Define the structure of the neural network(ip-hidden-hidden-output)
Neural_Net = []

for layer_index in range(len(layers_dim)):
    if layer_index == 0:  # Input layer (No weights needed)
        Neural_Net.append(layer(layer_index, False, 0, layers_dim[layer_index], 'irrelevant'))
    elif layer_index + 1 == len(layers_dim):  # Output layer
        Neural_Net.append(layer(layer_index, True, layers_dim[layer_index - 1], layers_dim[layer_index], activation='linear'))
    else:  # Hidden layers
        Neural_Net.append(layer(layer_index, False, layers_dim[layer_index - 1], layers_dim[layer_index], activation='relu'))

# Predicted number of trainable parameters (weights + biases)
pred_n_param = sum([(layers_dim[layer_index] + 1) * layers_dim[layer_index + 1] for layer_index in range(len(layers_dim) - 1)])

# Actual number of parameters from the Neural_Net
act_n_param = sum([Neural_Net[layer_index].W.size + Neural_Net[layer_index].b.size for layer_index in range(1, len(layers_dim))])

print(f'Predicted number of hyperparameters: {pred_n_param}')
print(f'Actual Number of hyperparameters: {act_n_param}')
print(f'Number of data: {X_COLUMN}')

if act_n_param >= X_COLUMN:
    raise Exception("It will overfit.")



Predicted number of hyperparameters: 47
Actual Number of hyperparameters: 47
Number of data: 2000


##### 5.Forward Propagation

In [155]:
#Forward Propagation:
#Formula: a^l = (sigma)^l(z^l)= (W^l)^Ta^(l-1) + b^l i.e. Z = W * A_prev +b
def activation(input_,act_function):
    if act_function == 'relu':
        return np.maximum(input_,np.zeros(input_.shape))
    elif act_function == 'linear':
        return input_
    else:
        raise Exception('Activation function is not defined')

def forward_prop(input_vec, layers_dim = layers_dim, Neural_Net = Neural_Net):
    Neural_Net[0].A = input_vec
    for layer_index in range(1,len(layers_dim)):
        Neural_Net[layer_index].Z = np.add(np.dot(Neural_Net[layer_index].W, Neural_Net[layer_index-1].A), Neural_Net[layer_index].b)
        Neural_Net[layer_index].A = activation(Neural_Net[layer_index].Z, Neural_Net[layer_index].activation)
    return Neural_Net[layer_index].A

##### 6.Backward Propagation

In [156]:
#Formula: az^l = {a^l-y if l= L and e is MSE
#     (W^(l+1))^T(delta)z^(l+1) . sigma'(z^l) if l<L}
#      NOW use deltaW^l= delta z^l(a^(l-1)^T) and deltab^l=deltaz^l

def get_loss(Y, Y_Hat, metric='mse'):
    if metric == 'mse':
        individual_loss = 0.5 * (Y_Hat - Y) ** 2
        loss = np.mean([np.linalg.norm(individual_loss[:, col], 2) for col in range(individual_loss.shape[1])])  
        return loss  #  Ensure the function returns a value
    else:
        raise Exception('Loss metric is not defined')

def get_dZ_from_loss(Y,Y_HAT,metric):
    if metric == 'mse':
        return Y_HAT - Y
    else:
        raise Exception('Loss metric is not defined')

def get_deactivation(A, act_function):
    if act_function == 'relu':
        return np.maximum(np.sign(A),np.zeros(A.shape))
    elif act_function == 'linear':
        return np.ones(A.shape)
    else:
        raise Exception('Activation function is not defined')

def backward_prop(Y,Y_Hat, metric = 'mse', layers_dim = layers_dim, Neural_Net = Neural_Net, Num_data_train = Num_data_train ):
    for layer_index in range(len(layers_dim)-1,0,1):
        if layer_index +1 == len(layers_dim):
            dZ = get_dZ_from_loss(Y,Y_Hat,metric)
        else:
            dZ = np.multiply(np.dot(Neural_Net[layer_index + 1].W.T, dZ),get_deactivation(Neural_Net[layer_index].A, Neural_Net[layer_index].activation))

        dW = np.dot(dZ, Neural_Net[layer_index -1].A.T) / Num_data_train 
        db = np.sum(dZ,axis = 1, keepdims= True)/ Num_data_train 

        Neural_Net[layer_index].dW = dW
        Neural_Net[layer_index].db = db

##### 5.Update Rules
###### W = W - learning_rate * dW
###### b = b - learning_rate * db


#### 6.Epoch

In [157]:
#Epoch
Learning_Rate = 0.001
max_epoch = 1000000
for epoch in range(1,max_epoch + 1):
    Y_Hat_Train = forward_prop(X_Train)
    backward_prop(Y_Train,Y_Hat_Train)
    
    for layer_index in range(1,len(layers_dim)):
        Neural_Net[layer_index].W = Neural_Net[layer_index].W - Learning_Rate * Neural_Net[layer_index].dW
        Neural_Net[layer_index].b = Neural_Net[layer_index].b - Learning_Rate * Neural_Net[layer_index].db
    if epoch % 1000000 == 0:
        print(f'{get_loss(Y_Train, Y_Hat_Train): .4f}')

 6111.9770


#### 7.Test Model

In [158]:
print(get_loss(Y_test, forward_prop(X_test)))

6081.700199535426


#### Prediction

In [None]:
# def predict(X_Raw_Any):
#     X_Any = np.array([standarize(X_Raw_Any[row,:],X_scalers[row]) for row in range(X_ROW)])
#     Y_Hat = forward_prop(X_Any)
#     Y_Hat_Any = np.array([standarize(X_Raw_Any[row,:],Y_scalers[row]) for row in range(Y_Row)])
#     return Y_Hat_Any

# predict(np.array([[30,70],[70,30],[3,5],[888,122]]).T)