In [None]:
#Neural network with 1 input layer, 2 hidden layer and 1 output layer

In [None]:
#importing required libraries
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Generating random X values and y values(with noise)

np.random.seed(42)

X =np.random.rand(100, 2)
matrix= np.array([[2], [3]])
y = X @ matrix + np.random.randn(100, 1) * 0.1

In [None]:
#neurons_in_each_layer
input_neuron=2
hl1_neuron=5
hl2_neuron=5
output_neuron=1

In [None]:
#weights and biases initialization

#i/p to hidden layer1
w1 = np.random.randn(input_neuron,hl1_neuron) *0.01
b1 = np.zeros((1,hl1_neuron))

#hidden layer 1 to hidden layer 2
w2=np.random.randn(hl1_neuron,hl2_neuron) *0.01
b2= np.zeros((1,hl2_neuron))

#hidden layer2 to output layer
w3=np.random.randn(hl2_neuron,output_neuron) *0.01
b3 = np.zeros((1,output_neuron))


In [None]:
#hyperparameter_initialization
learning_rate=0.001


#relu_activation
def relu(x):
    return np.maximum(0, x)


In [None]:
#forward_propagation_with_relu_activation_function
def forward_prop(X, w1, b1, w2, b2, w3, b3):
    Z1 = np.dot(X, w1) + b1
    A1 = relu(Z1)

    Z2 = np.dot(A1, w2) + b2
    A2 = relu(Z2)

    y_pred = np.dot(A2, w3) + b3
    return y_pred, A1, A2

In [None]:
#cost_computation_MSE
def cost_func(y,y_pred):
  return np.mean((y-y_pred)**2)

In [None]:
#backpropagation_to_compute_gradients
def back_prop(X,y,y_pred,A1,A2,w1,w2,w3):

    # Output layer gradients
    dy_pred = (-2 / len(y)) * (y - y_pred)  # Gradient of loss w.r.t. y_pred
    dw3 = np.dot(A2.T, dy_pred)  # Gradient of loss w.r.t. w3
    db3 = np.sum(dy_pred, axis=0, keepdims=True)  # Gradient of loss w.r.t. b3

    # Hidden layer 2 gradients
    dA2 = np.dot(dy_pred, w3.T)
    dZ2 = dA2 * (A2 > 0)  # Derivative of ReLU
    dw2 = np.dot(A1.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    # Hidden layer 1 gradients
    dA1 = np.dot(dZ2, w2.T)
    dZ1 = dA1 * (A1 > 0)  # Derivative of ReLU
    dw1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    return dw1,db1,dw2,db2,dw3,db3

In [None]:
def gradient_descent(X, y, w1, b1, w2, b2, w3, b3, learning_rate, iteration_list):
    final_cost = None

    for iteration in iteration_list:
        print(f"\nFor {iteration} iterations:")
        for i in range(iteration):
            y_pred, A1, A2 = forward_prop(X, w1, b1, w2, b2, w3, b3)

            # Computing cost
            cost = cost_func(y, y_pred)

            # Backward pass
            dw1, db1, dw2, db2, dw3, db3 = back_prop(X, y, y_pred, A1, A2, w1, w2, w3)

            # Updating weights and biases
            w1 -= learning_rate * dw1
            b1 -= learning_rate * db1
            w2 -= learning_rate * dw2
            b2 -= learning_rate * db2
            w3 -= learning_rate * dw3
            b3 -= learning_rate * db3

            # Print cost for every 100 iterations
            if i % 100 == 0:
                print(f"  Iteration {i}: Cost = {cost:.4f}")

        # cost for current iteration
        final_cost = cost


    return w1, b1, w2, b2, w3, b3, final_cost

#iteration list
iterations_list = [200, 400, 800,2000,5000,6000,7000,8000,9000]

#costs for different iterations
w1, b1, w2, b2, w3, b3, last_cost = gradient_descent(X, y, w1, b1, w2, b2, w3, b3, learning_rate, iterations_list)



For 200 iterations:
  Iteration 0: Cost = 7.0182
  Iteration 100: Cost = 5.0637

For 400 iterations:
  Iteration 0: Cost = 3.7541
  Iteration 100: Cost = 2.8766
  Iteration 200: Cost = 2.2887
  Iteration 300: Cost = 1.8947

For 800 iterations:
  Iteration 0: Cost = 1.6307
  Iteration 100: Cost = 1.4538
  Iteration 200: Cost = 1.3353
  Iteration 300: Cost = 1.2559
  Iteration 400: Cost = 1.2027
  Iteration 500: Cost = 1.1671
  Iteration 600: Cost = 1.1432
  Iteration 700: Cost = 1.1272

For 2000 iterations:
  Iteration 0: Cost = 1.1165
  Iteration 100: Cost = 1.1093
  Iteration 200: Cost = 1.1045
  Iteration 300: Cost = 1.1013
  Iteration 400: Cost = 1.0992
  Iteration 500: Cost = 1.0977
  Iteration 600: Cost = 1.0968
  Iteration 700: Cost = 1.0961
  Iteration 800: Cost = 1.0957
  Iteration 900: Cost = 1.0954
  Iteration 1000: Cost = 1.0952
  Iteration 1100: Cost = 1.0951
  Iteration 1200: Cost = 1.0950
  Iteration 1300: Cost = 1.0949
  Iteration 1400: Cost = 1.0949
  Iteration 1500: C

In [None]:

"""
 200 Iterations
-Cost Behavior : The cost starts at 7.0182 and drops to 5.0637 after 100 iterations.
-Interpretation : Initial reduction shows progress, but the cost remains high, indicating more optimization is needed.

400 Iterations
- Cost Behavior : The cost decreases from 3.7541 at iteration 0 to 1.8947 at iteration 300.
- Interpretation : Faster improvement, but the rate slows after 100 iterations which shows the model is still optimizing.

800 Iterations
- Cost Behavior: The cost drops from 1.6307 to 1.1272 at iteration 700.
- Interpretation: Slower cost reduction indicates nearing convergence, with fewer improvements per iteration.

2000 Iterations
-  Cost Behavior : The cost stabilizes at 1.0952 after 1000 iterations.
-  Interpretation : The model reaches a plateau, showing minimal improvement with further iterations.

5000 Iterations
-  Cost Behavior : The cost stays at 1.0948.
-  Interpretation : The model has converged, with no significant improvement after many iterations.

Conclusion
- The model shows rapid improvement in the first few hundred iterations, then stabilizes as it converges.
Further training beyond 1000 iterations gives small gains, suggesting that additional iterations aren't much needed.
"""

"\n 200 Iterations \n-Cost Behavior : The cost starts at 7.0182 and drops to 5.0637 after 100 iterations.\n-Interpretation : Initial reduction shows progress, but the cost remains high, indicating more optimization is needed.\n\n400 Iterations\n- Cost Behavior : The cost decreases from 3.7541 at iteration 0 to 1.8947 at iteration 300.\n- Interpretation : Faster improvement, but the rate slows after 100 iterations, suggesting the model is still optimizing.\n\n800 Iterations\n- Cost Behavior: The cost drops from 1.6307 to 1.1272 at iteration 700.\n- Interpretation: Slower cost reduction indicates nearing convergence, with fewer improvements per iteration.\n\n2000 Iterations \n-  Cost Behavior : The cost stabilizes at 1.0952 after 1000 iterations.\n-  Interpretation : The model reaches a plateau, showing minimal improvement with further iterations.\n\n5000 Iterations \n-  Cost Behavior : The cost stays at 1.0948.\n-  Interpretation : The model has converged, with no significant improvemen

In [None]:
#final weights and biases
print(f"Final weight1:{w1}")
print(f"Final bias1:\n{b1}")
print(f"Final weight2:\n{w2}")
print(f"Final bias2:\n{b2}")
print(f"Final weight3:\n{w3}")
print(f"Final bias3:\n{b3}")

Final weight1:[[ 0.0141256   0.00551461 -0.0150777  -0.00484234  0.02032821]
 [-0.01638974 -0.00191787  0.00827191 -0.0092693   0.0095872 ]]
Final bias1:
[[ 0.00018753 -0.00134882 -0.00079552  0.          0.00041813]]
Final weight2:
[[-0.03191215 -0.01024388 -0.00252987 -0.01247784  0.01632187]
 [-0.01068308 -0.00440044  0.00130211  0.01441228 -0.01436002]
 [ 0.01186759  0.00010233 -0.0098151   0.00462051  0.0019906 ]
 [-0.00600217  0.00069802 -0.00385314  0.00113517  0.00662131]
 [ 0.02442133 -0.01237815  0.02132093 -0.01952094 -0.00151933]]
Final bias2:
[[ 2.73300774e-02  0.00000000e+00 -6.01991999e-04 -9.78473158e-05
  -1.42437482e-04]]
Final weight3:
[[ 0.03176745]
 [ 0.00280992]
 [-0.00622416]
 [-0.0020798 ]
 [-0.00492908]]
Final bias3:
[[2.43292377]]
