In [1]:
import numpy as np

def gradient_check(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation computes the gradient correctly.
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", etc.
    gradients -- python dictionary containing your gradients "dW1", "db1", "dW2", "db2", etc.
    X -- input data of shape (input_size, m)
    Y -- true labels
    epsilon -- tiny shift to the input to compute approximated gradient
    
    Returns:
    difference -- difference between the approximated gradient and the backward propagation gradient
    """
    
    # 1. Convert dictionary parameters to a single vector (theta)
    # (Helper function 'dictionary_to_vector' flattens W1, b1... into a long 1D array)
    parameters_values, _ = dictionary_to_vector(parameters)
    
    # 2. Convert dictionary gradients to a single vector (grad)
    grad = gradients_to_vector(gradients)
    
    # Initialize the vector for numerical approximation
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    
    # 3. Loop over every parameter to compute its numerical gradient
    for i in range(num_parameters):
        
        # Save the original value so we can restore it later
        thetaplus = np.copy(parameters_values)
        thetaminus = np.copy(parameters_values)
        
        # Compute J_plus [J(theta + epsilon)]
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        # (Helper function 'vector_to_dictionary' converts vector back to W1, b1...)
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))
        
        # Compute J_minus [J(theta - epsilon)]
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus))
        
        # Compute Grad Approx (Two-sided difference)
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
    
    # 4. Compare gradapprox (Numerical) vs grad (Analytical/Backprop)
    # We use the Euclidean distance formula normalized by the lengths
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 2e-7:
        print(f"\033[91mMistake likely in backward propagation! difference = {difference}\033[0m")
    else:
        print(f"\033[92mYour backward propagation works perfectly. difference = {difference}\033[0m")
        
    return difference