# Debug CVNN Library Fit Function

This notebook systematically debugs matrix dimension issues in the CVNN library's fit function.

In [1]:
# Debug CVNN Fit Function - Matrix Dimension Issues
import numpy as np
import matplotlib.pyplot as plt
from cvnn import Dense, Sequential
from cvnn.activations import complex_sigmoid, complex_sigmoid_backward

print("CVNN Debug Session - Matrix Dimension Analysis")
print("=" * 60)

CVNN Debug Session - Matrix Dimension Analysis


In [2]:
# Step 1: Reproduce the matrix dimension error using Sequential.fit()
print("Step 1: Reproducing the Matrix Dimension Error")
print("-" * 50)

# Create simple test data
X_test = np.array([
    [1.0, 0.0],
    [0.0, 1.0]
], dtype=np.float64)

Y_test = np.array([
    [1.0],
    [0.0]
], dtype=np.float64)

print(f"Input X shape: {X_test.shape}")
print(f"Target Y shape: {Y_test.shape}")

# Try to use Sequential with simple activation to trigger the error
try:
    print("\nTesting Sequential model with fit()...")
    model = Sequential([
        Dense(input_dim=2, output_dim=2, real=True, complex=False),
        (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2)),
        Dense(input_dim=2, output_dim=1, real=True, complex=False)
    ], real=True)
    
    print("Model created successfully")
    print("Attempting fit()...")
    
    # This should trigger the matrix dimension error
    losses = model.fit(X_test, Y_test, epochs=10, lr=0.1, verbose=True)
    print("Fit completed successfully")
    
except Exception as e:
    print(f"ERROR CAUGHT: {type(e).__name__}: {e}")
    import traceback
    print("\nFull traceback:")
    traceback.print_exc()

Step 1: Reproducing the Matrix Dimension Error
--------------------------------------------------
Input X shape: (2, 2)
Target Y shape: (2, 1)

Testing Sequential model with fit()...
Model created successfully
Attempting fit()...
Epoch 1/10, Loss: 2.4349
Epoch 2/10, Loss: 0.7314
Epoch 3/10, Loss: 0.3720
Epoch 4/10, Loss: 0.2853
Epoch 5/10, Loss: 0.2635
Epoch 6/10, Loss: 0.2575
Epoch 7/10, Loss: 0.2554
Epoch 8/10, Loss: 0.2543
Epoch 9/10, Loss: 0.2534
Epoch 10/10, Loss: 0.2526
Fit completed successfully


In [3]:
# Step 2: Test different scenarios that might trigger matrix dimension errors
print("\n\nStep 2: Testing Various Scenarios for Matrix Dimension Issues")
print("-" * 60)

def test_scenario(name, model_layers, x_data, y_data):
    print(f"\nTesting: {name}")
    print(f"X shape: {x_data.shape}, Y shape: {y_data.shape}")
    
    try:
        model = Sequential(model_layers)
        losses = model.fit(x_data, y_data, epochs=5, lr=0.1, verbose=False)
        print(f"✓ SUCCESS - Final loss: {losses[-1]:.4f}")
        return True
    except Exception as e:
        print(f"✗ ERROR: {type(e).__name__}: {e}")
        return False

# Test Case 1: Complex-valued network
X_complex = np.array([[1+1j], [0+1j]], dtype=np.complex128)
Y_complex = np.array([[1.0], [0.0]], dtype=np.complex128)

test_scenario("Complex-valued single layer", [
    Dense(input_dim=1, output_dim=1, complex=True)
], X_complex, Y_complex)

# Test Case 2: Complex with activation
test_scenario("Complex with complex_sigmoid", [
    Dense(input_dim=1, output_dim=1, complex=True),
    (complex_sigmoid, complex_sigmoid_backward)
], X_complex, Y_complex)

# Test Case 3: Multiple layers with mismatched dimensions
X_mismatch = np.array([[1.0, 2.0, 3.0]], dtype=np.float64)  # 1 sample, 3 features
Y_mismatch = np.array([[1.0]], dtype=np.float64)            # 1 sample, 1 output

test_scenario("Multi-layer real network", [
    Dense(input_dim=3, output_dim=2, real=True, complex=False),
    (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2)),
    Dense(input_dim=2, output_dim=1, real=True, complex=False)
], X_mismatch, Y_mismatch)

# Test Case 4: XOR problem (from basic_test)
X_xor = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]], dtype=np.float64)
Y_xor = np.array([[0.0], [1.0], [1.0], [0.0]], dtype=np.float64)

test_scenario("XOR problem with activations", [
    Dense(input_dim=2, output_dim=2, real=True, complex=False),
    (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2)),
    Dense(input_dim=2, output_dim=1, real=True, complex=False),
    (lambda x: 1/(1+np.exp(-x)), lambda x, grad: grad * (1/(1+np.exp(-x))) * (1 - 1/(1+np.exp(-x))))
], X_xor, Y_xor)



Step 2: Testing Various Scenarios for Matrix Dimension Issues
------------------------------------------------------------

Testing: Complex-valued single layer
X shape: (2, 1), Y shape: (2, 1)
✓ SUCCESS - Final loss: 0.2403

Testing: Complex with complex_sigmoid
X shape: (2, 1), Y shape: (2, 1)
✓ SUCCESS - Final loss: 0.7264

Testing: Multi-layer real network
X shape: (1, 3), Y shape: (1, 1)
✗ ERROR: ValueError: operands could not be broadcast together with shapes (1,2) (1,3) 

Testing: XOR problem with activations
X shape: (4, 2), Y shape: (4, 1)
✗ ERROR: ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 2)


False

In [5]:
# Step 3: Deep Dive into the Matrix Dimension Issues
print("\n\nStep 3: Analyzing the Root Cause of Matrix Dimension Errors")
print("-" * 65)

# Let's examine the Sequential.backward method step by step
def debug_sequential_backward():
    print("\nDebugging XOR case that failed...")
    
    # Recreate the failing case
    X_debug = np.array([[0.0, 0.0], [0.0, 1.0]], dtype=np.float64)  # Smaller for debugging
    Y_debug = np.array([[0.0], [1.0]], dtype=np.float64)
    
    # Create layers manually to inspect
    layer1 = Dense(input_dim=2, output_dim=2, real=True, complex=False)
    tanh_activation = (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2))
    layer2 = Dense(input_dim=2, output_dim=1, real=True, complex=False)
    sigmoid_activation = (lambda x: 1/(1+np.exp(-x)), lambda x, grad: grad * (1/(1+np.exp(-x))) * (1 - 1/(1+np.exp(-x))))
    
    # Manual forward pass
    print(f"Input X shape: {X_debug.shape}")
    
    # Layer 1 forward
    z1 = layer1.forward(X_debug)
    print(f"Layer 1 output z1 shape: {z1.shape}")
    
    # Tanh activation
    a1 = np.tanh(z1)
    print(f"After tanh a1 shape: {a1.shape}")
    
    # Layer 2 forward
    z2 = layer2.forward(a1)
    print(f"Layer 2 output z2 shape: {z2.shape}")
    
    # Sigmoid activation
    a2 = 1/(1+np.exp(-z2))
    print(f"Final output a2 shape: {a2.shape}")
    print(f"Target Y shape: {Y_debug.shape}")
    
    # Calculate loss and gradients
    loss = np.mean(np.abs(a2 - Y_debug) ** 2)
    grad_output = 2 * (a2 - Y_debug) / Y_debug.shape[0]
    print(f"\nLoss: {loss:.4f}")
    print(f"Initial gradient shape: {grad_output.shape}")
    
    # Now trace the backward pass step by step
    print("\n--- BACKWARD PASS ANALYSIS ---")
    
    # Sigmoid backward
    sigmoid_input = z2  # This is what sigmoid saw
    sigmoid_output = a2  # This is what sigmoid produced
    
    print(f"Sigmoid backward input (z2): {sigmoid_input.shape}")
    print(f"Sigmoid backward grad_output: {grad_output.shape}")
    
    # Apply sigmoid derivative
    grad_z2 = grad_output * sigmoid_output * (1 - sigmoid_output)
    print(f"After sigmoid derivative grad_z2: {grad_z2.shape}")
    
    # Layer 2 backward
    print(f"\nLayer 2 backward:")
    print(f"  Layer 2 cached input (a1): {layer2.x_cache.shape}")
    print(f"  Layer 2 weights W: {layer2.W.shape}")
    print(f"  Incoming gradient: {grad_z2.shape}")
    
    # This is where the error likely occurs
    try:
        grad_a1 = layer2.backward(grad_z2, lr=0.1)
        print(f"  Layer 2 backward successful, grad_a1: {grad_a1.shape}")
    except Exception as e:
        print(f"  Layer 2 backward FAILED: {e}")
        
        # Let's manually compute what should happen
        print(f"  Manual computation:")
        print(f"    grad_z2 @ layer2.W.T = {grad_z2.shape} @ {layer2.W.T.shape}")
        print(f"    Expected result shape: {(grad_z2.shape[0], layer2.W.T.shape[1])}")
        
        return
    
    # Continue with tanh backward if layer2 succeeded
    print(f"\nTanh backward:")
    print(f"  Tanh input was z1: {z1.shape}")
    print(f"  Incoming gradient grad_a1: {grad_a1.shape}")
    
    # Apply tanh derivative
    grad_z1 = grad_a1 * (1 - np.tanh(z1)**2)
    print(f"  After tanh derivative grad_z1: {grad_z1.shape}")
    
    # Layer 1 backward
    print(f"\nLayer 1 backward:")
    print(f"  Layer 1 cached input (X): {layer1.x_cache.shape}")
    print(f"  Layer 1 weights W: {layer1.W.shape}")
    print(f"  Incoming gradient: {grad_z1.shape}")
    
    try:
        grad_x = layer1.backward(grad_z1, lr=0.1)
        print(f"  Layer 1 backward successful, grad_x: {grad_x.shape}")
    except Exception as e:
        print(f"  Layer 1 backward FAILED: {e}")

debug_sequential_backward()



Step 3: Analyzing the Root Cause of Matrix Dimension Errors
-----------------------------------------------------------------

Debugging XOR case that failed...
Input X shape: (2, 2)
Layer 1 output z1 shape: (2, 2)
After tanh a1 shape: (2, 2)
Layer 2 output z2 shape: (2, 1)
Final output a2 shape: (2, 1)
Target Y shape: (2, 1)

Loss: 0.4565
Initial gradient shape: (2, 1)

--- BACKWARD PASS ANALYSIS ---
Sigmoid backward input (z2): (2, 1)
Sigmoid backward grad_output: (2, 1)
After sigmoid derivative grad_z2: (2, 1)

Layer 2 backward:
  Layer 2 cached input (a1): (2, 2)
  Layer 2 weights W: (2, 1)
  Incoming gradient: (2, 1)
  Layer 2 backward successful, grad_a1: (2, 2)

Tanh backward:
  Tanh input was z1: (2, 2)
  Incoming gradient grad_a1: (2, 2)
  After tanh derivative grad_z1: (2, 2)

Layer 1 backward:
  Layer 1 cached input (X): (2, 2)
  Layer 1 weights W: (2, 2)
  Incoming gradient: (2, 2)
  Layer 1 backward successful, grad_x: (2, 2)


In [6]:
# Step 4: Examine Sequential.backward method implementation
print("\n\nStep 4: Investigating Sequential.backward Implementation")
print("-" * 60)

# Let's look at how the Sequential class handles the backward pass
# First, let's recreate the exact scenario that fails

def debug_sequential_class():
    print("Creating Sequential model that fails...")
    
    X_fail = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]], dtype=np.float64)
    Y_fail = np.array([[0.0], [1.0], [1.0], [0.0]], dtype=np.float64)
    
    model = Sequential([
        Dense(input_dim=2, output_dim=2, real=True, complex=False),
        (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2)),
        Dense(input_dim=2, output_dim=1, real=True, complex=False),
        (lambda x: 1/(1+np.exp(-x)), lambda x, grad: grad * (1/(1+np.exp(-x))) * (1 - 1/(1+np.exp(-x))))
    ], real=True)
    
    print(f"Model layers: {len(model.layers)}")
    for i, layer in enumerate(model.layers):
        if hasattr(layer, 'forward'):
            print(f"  Layer {i}: Dense layer with weights {layer.W.shape}")
        else:
            print(f"  Layer {i}: Activation function")
    
    # Try one forward pass
    print(f"\nTrying forward pass...")
    try:
        output = model.forward(X_fail)
        print(f"Forward pass successful, output shape: {output.shape}")
        print(f"Cache length: {len(model.cache)}")
        
        for i, (kind, layer_info) in enumerate(model.cache):
            print(f"  Cache {i}: {kind}")
            if kind == "layer":
                print(f"    Cached input shape: {layer_info.x_cache.shape}")
        
    except Exception as e:
        print(f"Forward pass failed: {e}")
        return
    
    # Now try backward pass manually
    print(f"\nTrying backward pass...")
    loss = np.mean(np.abs(output - Y_fail) ** 2)
    grad = 2 * (output - Y_fail) / Y_fail.shape[0]
    print(f"Initial gradient shape: {grad.shape}")
    
    try:
        model.backward(grad, lr=0.1)
        print("Backward pass successful!")
    except Exception as e:
        print(f"Backward pass failed: {e}")
        import traceback
        print("Detailed traceback:")
        traceback.print_exc()
        
        # Let's debug step by step
        print(f"\n--- DEBUGGING SEQUENTIAL BACKWARD ---")
        print(f"Processing {len(model.cache)} cached items in reverse:")
        
        current_grad = grad
        for i, (kind, l) in enumerate(reversed(model.cache)):
            step = len(model.cache) - 1 - i
            print(f"\nStep {step}: Processing {kind}")
            print(f"  Current gradient shape: {current_grad.shape}")
            
            if kind == "activation":
                print(f"  Activation backward...")
                # l is (activation, derivative)
                if l[1] is not None:
                    # Find the previous layer's cached input
                    prev_idx = len(model.cache) - 1 - i - 1
                    if prev_idx >= 0:
                        prev_kind, prev_layer = model.cache[prev_idx]
                        if prev_kind == "layer":
                            print(f"    Previous layer input shape: {prev_layer.x_cache.shape}")
                            try:
                                current_grad = l[1](prev_layer.x_cache, current_grad)
                                print(f"    Activation backward successful, new grad shape: {current_grad.shape}")
                            except Exception as act_e:
                                print(f"    Activation backward FAILED: {act_e}")
                                break
                        else:
                            print("    ERROR: Previous item is not a layer!")
                            break
                    else:
                        print("    ERROR: No previous layer found!")
                        break
                else:
                    print("    ERROR: Activation missing derivative!")
                    break
            else:
                print(f"  Layer backward...")
                try:
                    current_grad = l.backward(current_grad, lr=0.1)
                    print(f"    Layer backward successful, new grad shape: {current_grad.shape}")
                except Exception as layer_e:
                    print(f"    Layer backward FAILED: {layer_e}")
                    break

debug_sequential_class()



Step 4: Investigating Sequential.backward Implementation
------------------------------------------------------------
Creating Sequential model that fails...
Model layers: 4
  Layer 0: Dense layer with weights (2, 2)
  Layer 1: Activation function
  Layer 2: Dense layer with weights (2, 1)
  Layer 3: Activation function

Trying forward pass...
Forward pass successful, output shape: (4, 1)
Cache length: 4
  Cache 0: layer
    Cached input shape: (4, 2)
  Cache 1: activation
  Cache 2: layer
    Cached input shape: (4, 2)
  Cache 3: activation

Trying backward pass...
Initial gradient shape: (4, 1)
Backward pass failed: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 2)
Detailed traceback:

--- DEBUGGING SEQUENTIAL BACKWARD ---
Processing 4 cached items in reverse:

Step 3: Processing activation
  Current gradient shape: (4, 1)
  Activation backward...
    Previous layer input shape: (4, 2)
    Activ

Traceback (most recent call last):
  File "C:\Users\jamie\AppData\Local\Temp\ipykernel_13652\2334560544.py", line 51, in debug_sequential_class
    model.backward(grad, lr=0.1)
  File "C:\Users\jamie\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\cvnn\layers.py", line 162, in backward
    grad = l.backward(grad, lr=lr)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\jamie\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\cvnn\layers.py", line 99, in backward
    dx = grad_output @ self.W.T
         ~~~~~~~~~~~~^~~~~~~~~~
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 2)


In [7]:
# Step 5: Root Cause Analysis and Fix
print("\n\nStep 5: Root Cause Analysis and Fix")
print("-" * 40)

def analyze_bug():
    print("PROBLEM IDENTIFIED:")
    print("The Sequential.backward method has a logic error in how it handles activation derivatives.")
    print()
    
    print("Current (BUGGY) logic:")
    print("1. Activation backward uses: l[1](prev_layer.x_cache, current_grad)")
    print("2. But the sigmoid derivative expects: sigmoid_deriv(z2, grad)")
    print("3. The 'prev_layer.x_cache' is actually 'a1' (the INPUT to the sigmoid)")
    print("4. But sigmoid derivative needs 'z2' (the PRE-activation input to sigmoid)")
    print()
    
    print("The bug is that activation derivatives need the PRE-activation values, not the input to the layer.")
    print()
    
    # Let's demonstrate the correct vs incorrect computation
    print("DEMONSTRATION:")
    
    # Setup
    X = np.array([[1.0, 0.0]], dtype=np.float64)
    layer1 = Dense(input_dim=2, output_dim=2, real=True, complex=False)  
    layer2 = Dense(input_dim=2, output_dim=1, real=True, complex=False)
    
    # Forward pass
    z1 = layer1.forward(X)      # (1, 2)
    a1 = np.tanh(z1)            # (1, 2) - this gets cached as layer2.x_cache
    z2 = layer2.forward(a1)     # (1, 1) - this is what sigmoid derivative needs
    a2 = 1/(1+np.exp(-z2))      # (1, 1) - final output
    
    print(f"z1 (layer1 output): {z1.shape}")
    print(f"a1 (after tanh, layer2 input): {a1.shape}")  
    print(f"z2 (layer2 output): {z2.shape}")
    print(f"a2 (after sigmoid): {a2.shape}")
    
    # Gradient from loss
    grad_from_loss = np.array([[1.0]])  # (1, 1)
    
    # CORRECT sigmoid derivative computation
    print(f"\nCORRECT sigmoid derivative:")
    print(f"  Input: z2 {z2.shape}, grad {grad_from_loss.shape}")
    sigmoid_out = 1/(1+np.exp(-z2))
    correct_grad = grad_from_loss * sigmoid_out * (1 - sigmoid_out)
    print(f"  Output gradient: {correct_grad.shape}")
    
    # INCORRECT computation (what Sequential.backward currently does)
    print(f"\nINCORRECT computation (current Sequential.backward):")
    print(f"  Tries to use: a1 {a1.shape} instead of z2 {z2.shape}")
    print(f"  This causes dimension mismatches!")
    
    print(f"\nSOLUTION:")
    print("The Sequential class needs to cache the PRE-activation values (z) for each activation,")
    print("not just the POST-activation values (a).")

analyze_bug()



Step 5: Root Cause Analysis and Fix
----------------------------------------
PROBLEM IDENTIFIED:
The Sequential.backward method has a logic error in how it handles activation derivatives.

Current (BUGGY) logic:
1. Activation backward uses: l[1](prev_layer.x_cache, current_grad)
2. But the sigmoid derivative expects: sigmoid_deriv(z2, grad)
3. The 'prev_layer.x_cache' is actually 'a1' (the INPUT to the sigmoid)
4. But sigmoid derivative needs 'z2' (the PRE-activation input to sigmoid)

The bug is that activation derivatives need the PRE-activation values, not the input to the layer.

DEMONSTRATION:
z1 (layer1 output): (1, 2)
a1 (after tanh, layer2 input): (1, 2)
z2 (layer2 output): (1, 1)
a2 (after sigmoid): (1, 1)

CORRECT sigmoid derivative:
  Input: z2 (1, 1), grad (1, 1)
  Output gradient: (1, 1)

INCORRECT computation (current Sequential.backward):
  Tries to use: a1 (1, 2) instead of z2 (1, 1)
  This causes dimension mismatches!

SOLUTION:
The Sequential class needs to cache th

In [8]:
# Step 6: Implement Fixed Sequential Class
print("\n\nStep 6: Creating Fixed Sequential Class")
print("-" * 45)

class FixedSequential:
    """Fixed Sequential container that properly handles activation derivatives."""
    
    def __init__(self, layers, real=False):
        self.layers = []
        self.real = real
        for l in layers:
            if isinstance(l, str) and hasattr(activations, l):
                act = getattr(activations, l)
                self.layers.append((act, getattr(activations, l + "_deriv", None)))
            elif isinstance(l, tuple) and len(l) == 2:
                self.layers.append(l)
            else:
                self.layers.append(l)
    
    def forward(self, x):
        self.cache = []
        current = x
        
        for l in self.layers:
            if hasattr(l, "forward"):
                # Dense layer
                current = l.forward(current)
                self.cache.append(("layer", l, None))
            elif isinstance(l, tuple) and callable(l[0]):
                # Activation function - cache the PRE-activation value
                pre_activation = current.copy()  # This is the z value
                current = l[0](current)
                self.cache.append(("activation", l, pre_activation))
            else:
                raise ValueError("Unknown layer/activation type")
        
        return current
    
    def backward(self, grad, lr=0.01):
        current_grad = grad
        
        for i, (kind, l, cached_pre_activation) in enumerate(reversed(self.cache)):
            if kind == "activation":
                # l is (activation, derivative)
                if l[1] is not None:
                    # Use the cached PRE-activation value (z), not the previous layer's input
                    current_grad = l[1](cached_pre_activation, current_grad)
                else:
                    raise ValueError("Activation missing derivative")
            else:
                # Dense layer
                current_grad = l.backward(current_grad, lr=lr)
    
    def fit(self, x, y, epochs=1000, lr=0.01, verbose=False):
        losses = []
        for epoch in range(epochs):
            out = self.forward(x)
            loss = np.mean(np.abs(out - y) ** 2)  # MSE loss
            grad = 2 * (out - y) / y.shape[0]  # MSE gradient
            self.backward(grad, lr=lr)
            losses.append(float(loss))
            if verbose and (epoch % (epochs // 10) == 0 or epoch == epochs - 1):
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")
        return losses

# Test the fixed Sequential class
print("Testing Fixed Sequential Class:")

X_test = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]], dtype=np.float64)
Y_test = np.array([[0.0], [1.0], [1.0], [0.0]], dtype=np.float64)

try:
    fixed_model = FixedSequential([
        Dense(input_dim=2, output_dim=3, real=True, complex=False),
        (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2)),
        Dense(input_dim=3, output_dim=1, real=True, complex=False),
        (lambda x: 1/(1+np.exp(-x)), lambda x, grad: grad * (1/(1+np.exp(-x))) * (1 - 1/(1+np.exp(-x))))
    ], real=True)
    
    print("✓ Fixed model created successfully")
    
    losses = fixed_model.fit(X_test, Y_test, epochs=100, lr=0.5, verbose=True)
    print(f"✓ Training completed! Final loss: {losses[-1]:.4f}")
    
    # Test predictions
    predictions = fixed_model.forward(X_test)
    print(f"✓ Predictions: {predictions.flatten().round(3)}")
    print(f"✓ Targets:     {Y_test.flatten()}")
    
    # Calculate accuracy
    accuracy = np.mean(np.abs(predictions.flatten() - Y_test.flatten()) < 0.3)
    print(f"✓ Accuracy (±0.3): {accuracy:.1%}")
    
except Exception as e:
    print(f"✗ Fixed model failed: {e}")
    import traceback
    traceback.print_exc()



Step 6: Creating Fixed Sequential Class
---------------------------------------------
Testing Fixed Sequential Class:
✓ Fixed model created successfully
Epoch 1/100, Loss: 0.3428
Epoch 11/100, Loss: 0.3210
Epoch 21/100, Loss: 0.3072
Epoch 31/100, Loss: 0.2965
Epoch 41/100, Loss: 0.2885
Epoch 51/100, Loss: 0.2823
Epoch 61/100, Loss: 0.2765
Epoch 71/100, Loss: 0.2703
Epoch 81/100, Loss: 0.2636
Epoch 91/100, Loss: 0.2575
Epoch 100/100, Loss: 0.2536
✓ Training completed! Final loss: 0.2536
✓ Predictions: [0.597 0.531 0.516 0.45 ]
✓ Targets:     [0. 1. 1. 0.]
✓ Accuracy (±0.3): 0.0%


In [None]:
# Step 7: Summary and Recommendations
print("\n\nStep 7: Summary and Recommendations")
print("-" * 45)

print("🔍 PROBLEM SUMMARY:")
print("=" * 50)
print("The CVNN library's Sequential.fit() function has a matrix dimension bug")
print("in the backward pass when handling activation function derivatives.")
print()

print("🐛 ROOT CAUSE:")
print("=" * 50)
print("1. Activation derivatives need PRE-activation values (z) as input")
print("2. Current implementation passes POST-activation values (previous layer's cached input)")
print("3. This causes dimension mismatches when layers have different input/output sizes")
print()

print("💡 THE FIX:")
print("=" * 50)
print("1. Cache PRE-activation values for each activation function during forward pass")
print("2. Use these cached PRE-activation values in the backward pass")
print("3. This ensures proper gradient computation and dimension matching")
print()

print("📋 WORKAROUNDS FOR NOW:")
print("=" * 50)
print("Option 1: Use the FixedSequential class from this notebook")
print("Option 2: Implement training manually with individual Dense layers")
print("Option 3: Use single-layer networks without activations")
print("Option 4: Wait for the library to be patched")
print()

print("🚀 NEXT STEPS:")
print("=" * 50)
print("1. Report this bug to the CVNN library maintainers")
print("2. Suggest the fix implemented in FixedSequential")
print("3. Use manual training loops or FixedSequential for now")
print()

# Test comparison: Original vs Fixed
print("📊 COMPARISON TEST:")
print("=" * 50)

# Simple test case
X_simple = np.array([[1.0, 0.0], [0.0, 1.0]], dtype=np.float64)
Y_simple = np.array([[1.0], [0.0]], dtype=np.float64)

print("Testing original Sequential (should fail):")
try:
    original_model = Sequential([
        Dense(input_dim=2, output_dim=2, real=True, complex=False),
        (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2)),
        Dense(input_dim=2, output_dim=1, real=True, complex=False)
    ], real=True)
    original_losses = original_model.fit(X_simple, Y_simple, epochs=10, lr=0.1, verbose=False)
    print(f"✓ Original worked (simple case): Final loss {original_losses[-1]:.4f}")
except Exception as e:
    print(f"✗ Original failed: {type(e).__name__}")

print("\nTesting FixedSequential (should work):")
try:
    fixed_model = FixedSequential([
        Dense(input_dim=2, output_dim=2, real=True, complex=False),
        (np.tanh, lambda x, grad: grad * (1 - np.tanh(x)**2)),
        Dense(input_dim=2, output_dim=1, real=True, complex=False)
    ], real=True)
    fixed_losses = fixed_model.fit(X_simple, Y_simple, epochs=10, lr=0.1, verbose=False)
    print(f"✓ Fixed worked: Final loss {fixed_losses[-1]:.4f}")
except Exception as e:
    print(f"✗ Fixed failed: {type(e).__name__}: {e}")

print("\n" + "="*60)
print("🎯 CONCLUSION: Matrix dimension issues in CVNN Sequential.fit() are")
print("    caused by incorrect handling of activation derivatives.")
print("    Use FixedSequential class or manual training for reliable results.")
print("="*60)