<a href="https://colab.research.google.com/github/Soumya080/numpy-ml-foundations/blob/main/Neural_Network/IMPLEMENTING%20THE%20NEURAL%20NETWORK%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])

## DIMENSIONS
input_dim = 2
hidden_dim = 3
output_dim = 1

##  Parameters
W1 = np.random.randn(input_dim, hidden_dim) * 0.1
b1 = np.zeros((1, hidden_dim))
W2 = np.random.randn(hidden_dim, output_dim) * 0.1
b2 = np.zeros((1, output_dim))



## PRE ACTIVATATION
def pre_activation(X,W1,b1):
  z1 = np.dot(X,W1) + b1
  # print("z1 shape:", z1.shape)
  return z1

## ACTIVATION

def sigmoid(z1):
  return 1 / (1 + np.exp(-z1))



## RE ACTIVATION ( OBTAINING Z2)

def re_activation(z1,W2,b2):
  z2 = np.dot(z1,W2) + b2
  # print("z2 shape:", z2.shape)
  return z2

##  FINAL ACTIVATION
def final_activation(z2):
  output = sigmoid(z2)
  # print("output:\n", output)

  return output



# print("a1 shape:", a1.shape)


# ---- FORWARD PASS EXECUTION ----

z1 = pre_activation(X, W1, b1)     # hidden pre-activation
a1 = sigmoid(z1)                   # hidden activation

z2 = re_activation(a1, W2, b2)     # output pre-activation
output = final_activation(z2)      # output activation

# print("output:\n", output)



In [2]:
### STAGE 2 BINARY CROSS ENTROPY
y = np.array([[0], [1], [1], [0]])  # Assuming true labels for 4 samples
def BCE(y, output):
    eps = 1e-9
    output = np.clip(output, eps, 1 - eps)
    return np.mean(-y*np.log(output) - (1-y)*np.log(1-output))

In [3]:
## STAGE 3 OUTPUT GRADIENTS
def output_gradients(y):
  dz2 = output - y
  dw2 = np.dot(a1.T, dz2)
  ##Gradient w.r.t. b
  db2 = np.mean(dz2)
  return dz2, dw2, db2
# dz2 = output - y
##Gradient w.r.t. W2

In [4]:
# HIDDEN LAYER GRADIENTS
def hidden_layer_gradients(dz2, W2, a1):
  da1 = np.dot(dz2, W2.T)
  dz1 = da1 * a1 * (1 - a1)
  dw1 = np.dot(X.T, dz1)
  db1 = np.mean(dz1)

  return dw1, db1, dz1, da1



In [5]:
def update_parameters(W1, b1, W2, b2, dw1, db1, dw2, db2, learning_rate=0.01):
  W1 = W1 - learning_rate * dw1
  b1 = b1 - learning_rate * db1
  W2 = W2 - learning_rate * dw2
  b2 = b2 - learning_rate * db2
  return W1, b1, W2, b2


In [6]:
learning_rate = 0.1
num_epochs = 1000

for i in range(num_epochs):

    # -------- FORWARD PASS --------
    z1 = pre_activation(X, W1, b1)
    a1 = sigmoid(z1)

    z2 = re_activation(a1, W2, b2)
    output = final_activation(z2)

    # -------- LOSS --------
    loss = BCE(y, output)

    # -------- BACKPROP (OUTPUT LAYER) --------
    dz2, dw2, db2 = output_gradients(y)

    # -------- BACKPROP (HIDDEN LAYER) --------
    dw1, db1, dz1, da1 = hidden_layer_gradients(dz2, W2, a1)

    # -------- PARAMETER UPDATE --------
    W1, b1, W2, b2 = update_parameters(
        W1, b1, W2, b2,
        dw1, db1, dw2, db2,
        learning_rate
    )

    # -------- LOGGING --------
    if i % 100 == 0:
        print(f"Iteration {i} | Loss: {loss:.4f}")

Iteration 0 | Loss: 0.6938
Iteration 100 | Loss: 0.6931
Iteration 200 | Loss: 0.6931
Iteration 300 | Loss: 0.6931
Iteration 400 | Loss: 0.6931
Iteration 500 | Loss: 0.6931
Iteration 600 | Loss: 0.6931
Iteration 700 | Loss: 0.6931
Iteration 800 | Loss: 0.6931
Iteration 900 | Loss: 0.6931
