## 5. Implementation Code

This snippet shows how to implement L2 Regularization in the cost and backward steps. Note that `lambda` is a reserved keyword in Python, so we use `lambd`.

In [4]:
import numpy as np

def compute_cost_with_regularization(A3, Y, parameters, lambd):
    """
    Implements the cost function with L2 regularization.
    """
    m = Y.shape[1]
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    W3 = parameters["W3"]
    
    cross_entropy_cost = compute_cost(A3, Y) # The standard cost
    
    # Calculate L2 Penalty (Sum of squares of all weights)
    L2_regularization_cost = (lambd / (2 * m)) * (
        np.sum(np.square(W1)) + 
        np.sum(np.square(W2)) + 
        np.sum(np.square(W3))
    )
    
    cost = cross_entropy_cost + L2_regularization_cost
    return cost

def backward_propagation_with_regularization(X, Y, cache, lambd):
    """
    Implements the updates with weight decay.
    """
    m = X.shape[1]
    (Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cache
    
    dZ3 = A3 - Y
    
    # Add the regularization term (lambd/m * W) to the gradient
    dW3 = 1./m * np.dot(dZ3, A2.T) + (lambd/m) * W3
    db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)
    
    dA2 = np.dot(W3.T, dZ3)
    dZ2 = np.multiply(dA2, np.int64(A2 > 0)) # ReLU derivative
    
    # Add the regularization term
    dW2 = 1./m * np.dot(dZ2, A1.T) + (lambd/m) * W2
    # ... continue for layer 1 ...
    
    return gradients