![image.png](attachment:image.png)

In [1]:
import numpy as np

def relu(x):
    """ReLU activation function"""
    return np.maximum(0, x)

def relu_derivative(x):
    """Derivative of ReLU function"""
    return (x > 0).astype(float)

def squared_loss(y, t):
    """Squared error loss"""
    return np.sum((y - t) ** 2)

def squared_loss_derivative(y, t):
    """Derivative of squared loss with respect to y"""
    return 2 * (y - t)

# Given data
y_tilde = np.array([0.1, -0.1, 0.4, 0.7, -0.2])  # Pre-activation values
t = np.array([0, 0, 1, 0, 0])  # Target values

print("=== Gradient Calculation for ReLU + Squared Loss ===")
print(f"Pre-activation values: {y_tilde}")
print(f"Target values: {t}")

# Step 1: Apply ReLU activation
y = relu(y_tilde)
print(f"\nAfter ReLU activation: {y}")

# Step 2: Calculate loss
loss = squared_loss(y, t)
print(f"Loss: {loss}")

# Step 3: Calculate gradients using chain rule
# ∂L/∂ỹᵢ = ∂L/∂yᵢ × ∂yᵢ/∂ỹᵢ

# Derivative of loss with respect to y
dL_dy = squared_loss_derivative(y, t)
print(f"\n∂L/∂y: {dL_dy}")

# Derivative of ReLU with respect to pre-activation
dy_dy_tilde = relu_derivative(y_tilde)
print(f"∂y/∂ỹ (ReLU derivative): {dy_dy_tilde}")

# Chain rule: ∂L/∂ỹ = ∂L/∂y × ∂y/∂ỹ
delta = dL_dy * dy_dy_tilde
print(f"\nFinal gradients ∂L/∂ỹ: {delta}")

# Step-by-step breakdown
print("\n=== Step-by-step breakdown ===")
for i in range(len(y_tilde)):
    print(f"i={i+1}: ỹ={y_tilde[i]}, y={y[i]}, t={t[i]}")
    print(f"    ∂L/∂y = 2(y-t) = 2({y[i]}-{t[i]}) = {dL_dy[i]}")
    print(f"    ∂y/∂ỹ = {'1' if y_tilde[i] > 0 else '0'} (ReLU derivative)")
    print(f"    δ = {dL_dy[i]} × {dy_dy_tilde[i]} = {delta[i]}")
    print()

print("=== Answer Options Check ===")
options = {
    'a': [-0.2, 0, -0.8, -1.4, 0],
    'b': [-0.2, 0, 1.2, -1.4, 0],
    'c': [-0.2, 0.2, 1.2, -1.4, 0.4],
    'd': [-0.1, 0, 0.6, -0.7, 0],
    'e': [-0.1, 0.1, 0.6, -0.7, 0.2],
    'f': [0.1, -0.1, -0.6, 0.7, -0.2],
    'g': [0.1, 0, -0.6, 0.7, 0],
    'h': [0.2, -0.2, -1.2, 1.4, -0.4],
    'i': [0.2, 0, -1.2, 1.4, 0],
    'j': [0.2, 0, 0.8, 1.4, 0]
}

print(f"Our calculated result: {delta.tolist()}")
for option, values in options.items():
    if np.allclose(delta, values, atol=1e-10):
        print(f"✓ Matches option ({option}): {values}")
    else:
        print(f"  Option ({option}): {values}")

=== Gradient Calculation for ReLU + Squared Loss ===
Pre-activation values: [ 0.1 -0.1  0.4  0.7 -0.2]
Target values: [0 0 1 0 0]

After ReLU activation: [0.1 0.  0.4 0.7 0. ]
Loss: 0.8599999999999999

∂L/∂y: [ 0.2  0.  -1.2  1.4  0. ]
∂y/∂ỹ (ReLU derivative): [1. 0. 1. 1. 0.]

Final gradients ∂L/∂ỹ: [ 0.2  0.  -1.2  1.4  0. ]

=== Step-by-step breakdown ===
i=1: ỹ=0.1, y=0.1, t=0
    ∂L/∂y = 2(y-t) = 2(0.1-0) = 0.2
    ∂y/∂ỹ = 1 (ReLU derivative)
    δ = 0.2 × 1.0 = 0.2

i=2: ỹ=-0.1, y=0.0, t=0
    ∂L/∂y = 2(y-t) = 2(0.0-0) = 0.0
    ∂y/∂ỹ = 0 (ReLU derivative)
    δ = 0.0 × 0.0 = 0.0

i=3: ỹ=0.4, y=0.4, t=1
    ∂L/∂y = 2(y-t) = 2(0.4-1) = -1.2
    ∂y/∂ỹ = 1 (ReLU derivative)
    δ = -1.2 × 1.0 = -1.2

i=4: ỹ=0.7, y=0.7, t=0
    ∂L/∂y = 2(y-t) = 2(0.7-0) = 1.4
    ∂y/∂ỹ = 1 (ReLU derivative)
    δ = 1.4 × 1.0 = 1.4

i=5: ỹ=-0.2, y=0.0, t=0
    ∂L/∂y = 2(y-t) = 2(0.0-0) = 0.0
    ∂y/∂ỹ = 0 (ReLU derivative)
    δ = 0.0 × 0.0 = 0.0

=== Answer Options Check ===
Our calculated result: [0

![image.png](attachment:image.png)

![image-2.png](attachment:image-2.png)

![image-3.png](attachment:image-3.png)