In [5]:
import numpy as np

def sigmoid(z):
    
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
  
    return z * (1 - z)

def mse(y_true, y_pred):
    return np.mean(0.5 * (y_true - y_pred) ** 2)

def mse_derivative(y_true, y_pred):
    return y_pred - y_true


np.random.seed(42)
input_features = 4 
first_hidden_layer = 8 
second_hidden_layer = 6  
output_units = 2 


### Initialize weights and biases
weights_input_to_hidden1 = np.random.randn(input_features, first_hidden_layer) * 0.1
bias_hidden1 = np.zeros((1, first_hidden_layer))
weights_hidden1_to_hidden2 = np.random.randn(first_hidden_layer, second_hidden_layer) * 0.1
bias_hidden2 = np.zeros((1, second_hidden_layer))
weights_hidden2_to_output = np.random.randn(second_hidden_layer, output_units) * 0.1
bias_output = np.zeros((1, output_units))


learning_rate = 0.01

## Random input and target output data
input_data = np.random.randn(5, input_features)  
target_output = np.random.randn(5, output_units)  

for epoch in range(2):
    total_loss = 0

    for i in range(len(input_data)):
        # Forward pass
        x_sample = input_data[i:i+1]  
        y_sample = target_output[i:i+1]
        
        z1 = np.dot(x_sample, weights_input_to_hidden1) + bias_hidden1
        a1 = sigmoid(z1)
        z2 = np.dot(a1, weights_hidden1_to_hidden2) + bias_hidden2
        a2 = sigmoid(z2)
        z3 = np.dot(a2, weights_hidden2_to_output) + bias_output
        y_pred = z3  
        
        loss = mse(y_sample, y_pred)
        total_loss += loss
        
        # Backpropagation
        dL_dy = mse_derivative(y_sample, y_pred)
        dL_dW_output = np.dot(a2.T, dL_dy)
        dL_db_output = np.sum(dL_dy, axis=0, keepdims=True)
        
        dL_da2 = np.dot(dL_dy, weights_hidden2_to_output.T) * sigmoid_derivative(a2)
        dL_dW_hidden2 = np.dot(a1.T, dL_da2)
        dL_db_hidden2 = np.sum(dL_da2, axis=0, keepdims=True)
        
        dL_da1 = np.dot(dL_da2, weights_hidden1_to_hidden2.T) * sigmoid_derivative(a1)
        dL_dW_hidden1 = np.dot(x_sample.T, dL_da1)
        dL_db_hidden1 = np.sum(dL_da1, axis=0, keepdims=True)
        
        # Weight updates (SGD)
        weights_hidden2_to_output -= learning_rate * dL_dW_output
        bias_output -= learning_rate * dL_db_output
        weights_hidden1_to_hidden2 -= learning_rate * dL_dW_hidden2
        bias_hidden2 -= learning_rate * dL_db_hidden2
        weights_input_to_hidden1 -= learning_rate * dL_dW_hidden1
        bias_hidden1 -= learning_rate * dL_db_hidden1
        
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(input_data):.4f}")

# Print the updated weights and biases
print("Updated Weights and Biases :")
print("weights_input_to_hidden1 : ", weights_input_to_hidden1)
print("bias_hidden1 : ", bias_hidden1)

print("weights_hidden1_to_hidden2 : ", weights_hidden1_to_hidden2)
print("bias_hidden2 : ", bias_hidden2)

print("weights_hidden2_to_output : ", weights_hidden2_to_output)
print("bias_output : ", bias_output)


Epoch 1, Loss: 0.5386
Epoch 2, Loss: 0.5312
Updated Weights and Biases :
weights_input_to_hidden1 :  [[ 0.04966433 -0.01382461  0.06476445  0.15230337 -0.02341451 -0.02341329
   0.15792565  0.07674131]
 [-0.04697119  0.05425411 -0.04634599 -0.0465666   0.02419785 -0.19132775
  -0.1724822  -0.05623418]
 [-0.10139598  0.03138365 -0.0908468  -0.14114821  0.14655595 -0.0225393
   0.00684489 -0.14258857]
 [-0.05439493  0.01112887 -0.11506912  0.03750452 -0.0600498  -0.02920624
  -0.06023315  0.18532573]]
bias_hidden1 :  [[-8.32895596e-05 -1.05155253e-05 -4.43807468e-06  2.36565202e-05
   5.77345153e-06 -3.12397181e-06  2.53828286e-05 -1.13595790e-05]]
weights_hidden1_to_hidden2 :  [[-0.00137982 -0.10539524  0.08183304 -0.1217385   0.02077232 -0.19576489]
 [-0.13283805  0.02007975  0.07340227  0.01751509 -0.01166807 -0.02987361]
 [-0.14787457 -0.07162108 -0.04649469  0.10606398  0.03425941 -0.17608154]
 [ 0.03236517 -0.03810741 -0.0680907   0.06150281  0.10296599  0.09329129]
 [-0.0839193  -