In [10]:
import numpy as np

# =========================
# Dense Layer
# =========================
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons, weights, biases):
        self.weights = np.array(weights, dtype=float)
        self.biases = np.array(biases, dtype=float)

    def forward(self, inputs):
        self.inputs = np.array(inputs, dtype=float)
        self.output = np.dot(self.inputs, self.weights) + self.biases
        print("\n--- Forward Dense ---")
        print(f"Inputs: {self.inputs}")
        print(f"Weights:\n{self.weights}")
        print(f"Biases:\n{self.biases}")
        print(f"Output (Z): {self.output}")

    def backward(self, dvalues, learning_rate=0.1):
        print("\n--- Backward Dense ---")
        print(f"dvalues from next layer: {dvalues}")
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = np.dot(dvalues, self.weights.T)
        print(f"dWeights:\n{self.dweights}")
        print(f"dBiases:\n{self.dbiases}")
        print(f"dInputs (to pass back): {self.dinputs}")

        # Update
        self.weights -= learning_rate * self.dweights
        self.biases -= learning_rate * self.dbiases
        print(f"Updated Weights:\n{self.weights}")
        print(f"Updated Biases:\n{self.biases}")


# =========================
# ReLU Activation
# =========================
class Activation_ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0, inputs)
        print("\n--- Forward ReLU ---")
        print(f"Input: {inputs}")
        print(f"Output (after ReLU): {self.output}")

    def backward(self, dvalues):
        print("\n--- Backward ReLU ---")
        print(f"dvalues (from next layer): {dvalues}")
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0
        print(f"dInputs (after ReLU mask): {self.dinputs}")


# =========================
# Mean Squared Error Loss
# =========================
class Loss_MSE:
    def forward(self, y_pred, y_true):
        loss = np.mean((y_true - y_pred) ** 2)
        print("\n--- Loss ---")
        print(f"y_true: {y_true}")
        print(f"y_pred: {y_pred}")
        print(f"Loss: {loss:.4f}")
        return loss

    def backward(self, y_pred, y_true):
        samples = len(y_pred)
        self.dinputs = -2 * (y_true - y_pred) / samples
        print("\n--- Backward Loss ---")
        print(f"dInputs (gradient wrt predictions): {self.dinputs}")


# =========================
# MAIN (Training Loop)
# =========================
np.set_printoptions(precision=3, suppress=True)

# Input (1 sample, 3 features)
X = [[1, 2, 3]]

# Target output (scalar)
y_true = [[5]]

# Hidden layer (3 → 2)
layer1 = Layer_Dense(
    n_inputs=3, n_neurons=2,
    weights=[[0.1, 0.2],
             [0.3, 0.4],
             [0.5, 0.6]],
    biases=[[0.1, 0.1]]
)

# Output layer (2 → 1)
layer2 = Layer_Dense(
    n_inputs=2, n_neurons=1,
    weights=[[0.7],
             [0.8]],
    biases=[[0.1]]
)

activation1 = Activation_ReLU()
loss_fn = Loss_MSE()

# Training loop
epochs = 5
for step in range(1, epochs+1):
    print(f"\n================= EPOCH {step} =================")

    # ---- Forward pass ----
    layer1.forward(X)
    activation1.forward(layer1.output)
    layer2.forward(activation1.output)
    y_pred = layer2.output

    # ---- Loss ----
    loss = loss_fn.forward(y_pred, y_true)

    # ---- Backward pass ----
    loss_fn.backward(y_pred, y_true)
    layer2.backward(loss_fn.dinputs, learning_rate=0.01)
    activation1.backward(layer2.dinputs)
    layer1.backward(activation1.dinputs, learning_rate=0.01)




--- Forward Dense ---
Inputs: [[1. 2. 3.]]
Weights:
[[0.1 0.2]
 [0.3 0.4]
 [0.5 0.6]]
Biases:
[[0.1 0.1]]
Output (Z): [[2.3 2.9]]

--- Forward ReLU ---
Input: [[2.3 2.9]]
Output (after ReLU): [[2.3 2.9]]

--- Forward Dense ---
Inputs: [[2.3 2.9]]
Weights:
[[0.7]
 [0.8]]
Biases:
[[0.1]]
Output (Z): [[4.03]]

--- Loss ---
y_true: [[5]]
y_pred: [[4.03]]
Loss: 0.9409

--- Backward Loss ---
dInputs (gradient wrt predictions): [[-1.94]]

--- Backward Dense ---
dvalues from next layer: [[-1.94]]
dWeights:
[[-4.462]
 [-5.626]]
dBiases:
[[-1.94]]
dInputs (to pass back): [[-1.358 -1.552]]
Updated Weights:
[[0.745]
 [0.856]]
Updated Biases:
[[0.119]]

--- Backward ReLU ---
dvalues (from next layer): [[-1.358 -1.552]]
dInputs (after ReLU mask): [[-1.358 -1.552]]

--- Backward Dense ---
dvalues from next layer: [[-1.358 -1.552]]
dWeights:
[[-1.358 -1.552]
 [-2.716 -3.104]
 [-4.074 -4.656]]
dBiases:
[[-1.358 -1.552]]
dInputs (to pass back): [[-0.446 -1.028 -1.61 ]]
Updated Weights:
[[0.114 0.216]
