mport and Activation Functions

In [2]:
import numpy as np

# Define activation functions and their derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

print("✅ Activation functions defined (Sigmoid & ReLU)")

✅ Activation functions defined (Sigmoid & ReLU)


Initialize Weights and Input

In [3]:
# Initialize weights and biases
w1, b1 = 0.5, 0
w2, b2 = 0.5, 0
w3, b3 = 0.5, 0

# Input and true output
x = 1
y = 1

print(f"✅ Weights initialized: w1={w1}, w2={w2}, w3={w3}")
print(f"✅ Input: x={x}, Target: y={y}")

✅ Weights initialized: w1=0.5, w2=0.5, w3=0.5
✅ Input: x=1, Target: y=1


Sigmoid Network Forward Pass

In [4]:
# Sigmoid Network - Forward Pass
print("=== Sigmoid Network ===")

z1_s = w1 * x + b1
a1_s = sigmoid(z1_s)

z2_s = w2 * a1_s + b2
a2_s = sigmoid(z2_s)

z3_s = w3 * a2_s + b3
y_pred_s = sigmoid(z3_s)

loss_s = 0.5 * (y - y_pred_s) ** 2

print("\nForward Pass Results:")
print(f"Layer 1 Output (a1): {a1_s:.4f}")
print(f"Layer 2 Output (a2): {a2_s:.4f}")
print(f"Predicted Output (y_pred): {y_pred_s:.4f}")
print(f"Loss: {loss_s:.4f}")

=== Sigmoid Network ===

Forward Pass Results:
Layer 1 Output (a1): 0.6225
Layer 2 Output (a2): 0.5772
Predicted Output (y_pred): 0.5717
Loss: 0.0917


Sigmoid Network Backward Pass (Gradients)

In [5]:
# Sigmoid Network - Backward Pass
dL_dy_pred_s = -(y - y_pred_s)
dy_pred_dz3_s = sigmoid_derivative(z3_s)
dL_dw3_s = dL_dy_pred_s * dy_pred_dz3_s * a2_s

dL_da2_s = dL_dy_pred_s * dy_pred_dz3_s * w3
da2_dz2_s = sigmoid_derivative(z2_s)
dL_dw2_s = dL_da2_s * da2_dz2_s * a1_s

dL_da1_s = dL_da2_s * da2_dz2_s * w2
da1_dz1_s = sigmoid_derivative(z1_s)
dL_dw1_s = dL_da1_s * da1_dz1_s * x

print("\nGradients (Sigmoid):")
print(f"∂L/∂w3: {dL_dw3_s:.4f}")
print(f"∂L/∂w2: {dL_dw2_s:.4f}")
print(f"∂L/∂w1: {dL_dw1_s:.4f}")


Gradients (Sigmoid):
∂L/∂w3: -0.0605
∂L/∂w2: -0.0080
∂L/∂w1: -0.0015


ReLU Network Forward Pass

In [6]:
# ReLU Network - Forward Pass
print("\n=== ReLU Network ===")

z1_r = w1 * x + b1
a1_r = relu(z1_r)

z2_r = w2 * a1_r + b2
a2_r = relu(z2_r)

z3_r = w3 * a2_r + b3
y_pred_r = relu(z3_r)

loss_r = 0.5 * (y - y_pred_r) ** 2

print("\nForward Pass Results:")
print(f"Layer 1 Output (a1): {a1_r:.4f}")
print(f"Layer 2 Output (a2): {a2_r:.4f}")
print(f"Predicted Output (y_pred): {y_pred_r:.4f}")
print(f"Loss: {loss_r:.4f}")


=== ReLU Network ===

Forward Pass Results:
Layer 1 Output (a1): 0.5000
Layer 2 Output (a2): 0.2500
Predicted Output (y_pred): 0.1250
Loss: 0.3828


ReLU Network Backward Pass & Comparison

In [7]:
# ReLU Network - Backward Pass
dL_dy_pred_r = -(y - y_pred_r)
dy_pred_dz3_r = relu_derivative(z3_r)
dL_dw3_r = dL_dy_pred_r * dy_pred_dz3_r * a2_r

dL_da2_r = dL_dy_pred_r * dy_pred_dz3_r * w3
da2_dz2_r = relu_derivative(z2_r)
dL_dw2_r = dL_da2_r * da2_dz2_r * a1_r

dL_da1_r = dL_da2_r * da2_dz2_r * w2
da1_dz1_r = relu_derivative(z1_r)
dL_dw1_r = dL_da1_r * da1_dz1_r * x

print("\nGradients (ReLU):")
print(f"∂L/∂w3: {dL_dw3_r:.4f}")
print(f"∂L/∂w2: {dL_dw2_r:.4f}")
print(f"∂L/∂w1: {dL_dw1_r:.4f}")

# Final Comparison
print("\n=== Comparison ===")
print("Sigmoid Gradients vs ReLU Gradients:")
print(f"∂L/∂w3: {dL_dw3_s:.4f} (Sigmoid) | {dL_dw3_r:.4f} (ReLU)")
print(f"∂L/∂w2: {dL_dw2_s:.4f} (Sigmoid) | {dL_dw2_r:.4f} (ReLU)")
print(f"∂L/∂w1: {dL_dw1_s:.4f} (Sigmoid) | {dL_dw1_r:.4f} (ReLU)")


Gradients (ReLU):
∂L/∂w3: -0.2188
∂L/∂w2: -0.2188
∂L/∂w1: -0.2188

=== Comparison ===
Sigmoid Gradients vs ReLU Gradients:
∂L/∂w3: -0.0605 (Sigmoid) | -0.2188 (ReLU)
∂L/∂w2: -0.0080 (Sigmoid) | -0.2188 (ReLU)
∂L/∂w1: -0.0015 (Sigmoid) | -0.2188 (ReLU)
