In [1]:
import numpy as np
from convolution import Conv2D

def test_initialization():
    conv = Conv2D(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1)
    
    assert conv.weights.shape == (8, 3, 3, 3), f"Expected weight shape (8, 3, 3, 3), but got {conv.weights.shape}"
    assert conv.biases.shape == (8, 1), f"Expected bias shape (8, 1), but got {conv.biases.shape}"

    print("✓ Initialization test passed")

def test_forward_pass():
    conv = Conv2D(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1)
    
    x = np.random.randn(2, 3, 32, 32)  # Batch size of 2, 3 channels, 32x32 image
    output = conv.forward(x)

    expected_shape = (2, 8, 32, 32)  # Output should maintain the same spatial size due to padding=1
    assert output.shape == expected_shape, f"Expected output shape {expected_shape}, but got {output.shape}"

    print("✓ Forward pass test passed")

def test_backward_pass():
    conv = Conv2D(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1)
    
    x = np.random.randn(2, 3, 32, 32)
    output = conv.forward(x)
    
    d_out = np.random.randn(*output.shape)  # Simulating gradient from the next layer
    d_input = conv.backward(d_out)

    assert d_input[0].shape == x.shape, f"Expected gradient shape {x.shape}, but got {d_input.shape}"

    print("✓ Backward pass test passed")

if __name__ == "__main__":
    test_initialization()
    test_forward_pass()
    test_backward_pass()


✓ Initialization test passed
✓ Forward pass test passed
✓ Backward pass test passed


In [2]:
import numpy as np
from convolution import Conv2D

def numerical_gradient(conv_layer, x, d_out, epsilon=1e-5):
    numerical_dW = np.zeros_like(conv_layer.weights)
    numerical_db = np.zeros_like(conv_layer.biases)
    
    # Compute numerical gradient for weights
    for i in range(conv_layer.weights.shape[0]):
        for j in range(conv_layer.weights.shape[1]):
            for k in range(conv_layer.weights.shape[2]):
                for l in range(conv_layer.weights.shape[3]):
                    original_value = conv_layer.weights[i, j, k, l]
                    
                    # Perturb weight positively
                    conv_layer.weights[i, j, k, l] = original_value + epsilon
                    loss1 = np.sum(conv_layer.forward(x) * d_out)
                    
                    # Perturb weight negatively
                    conv_layer.weights[i, j, k, l] = original_value - epsilon
                    loss2 = np.sum(conv_layer.forward(x) * d_out)
                    
                    # Restore original weight
                    conv_layer.weights[i, j, k, l] = original_value
                    
                    numerical_dW[i, j, k, l] = (loss1 - loss2) / (2 * epsilon)
    
    # Compute numerical gradient for biases
    for i in range(conv_layer.biases.shape[0]):
        original_value = conv_layer.biases[i, 0]
        
        conv_layer.biases[i, 0] = original_value + epsilon
        loss1 = np.sum(conv_layer.forward(x) * d_out)
        
        conv_layer.biases[i, 0] = original_value - epsilon
        loss2 = np.sum(conv_layer.forward(x) * d_out)
        
        conv_layer.biases[i, 0] = original_value
        
        numerical_db[i, 0] = (loss1 - loss2) / (2 * epsilon)
    
    return numerical_dW, numerical_db

def test_gradient(conv_layer):
    np.random.seed(42)
    
    # Use consistent attribute names
    x = np.random.randn(1, conv_layer.in_channels, 5, 5)  # Batch size = 1, Input shape = (5,5)
    d_out = np.random.randn(1, conv_layer.out_channels, 3, 3)  # Output shape (assuming kernel=3, stride=1, padding=0)

    # Compute numerical gradients
    numerical_dW, numerical_db = numerical_gradient(conv_layer, x, d_out)

    # Compute analytical gradients without updating weights
    conv_layer.forward(x)
    _, analytical_dW, analytical_db = conv_layer.backward(d_out, update_weights=False)

    # Compute relative error for gradients
    weight_error = np.linalg.norm(numerical_dW - analytical_dW) / (np.linalg.norm(numerical_dW) + np.linalg.norm(analytical_dW))
    bias_error = np.linalg.norm(numerical_db - analytical_db) / (np.linalg.norm(numerical_db) + np.linalg.norm(analytical_db))

    print(f"Gradient Check - Weights Relative Error: {weight_error}")
    print(f"Gradient Check - Biases Relative Error: {bias_error}")

    assert weight_error < 1e-4, "Weight gradients are incorrect!"
    assert bias_error < 1e-4, "Bias gradients are incorrect!"
    print("Gradient check passed! ✅")

# Example usage:
conv = Conv2D(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=0)
conv.learning_rate = 0.01  # Make sure this is set if needed
test_gradient(conv)


Gradient Check - Weights Relative Error: 1.1114650280120296e-12
Gradient Check - Biases Relative Error: 2.7203882433146153e-09
Gradient check passed! ✅
