# Floating-Point Non-Associativity (Advanced)

## Objective

Demonstrate that floating-point arithmetic is not associative:
- Order-dependent summation
- Reproducibility challenges
- Parallel reduction issues

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)
plt.rcParams["figure.figsize"] = (12, 6)

## Non-Associativity Example

In exact arithmetic: (a + b) + c = a + (b + c)

In floating-point: This can fail!

In [None]:
# Demonstrate non-associativity
a = 1.0
b = 1e-16
c = -1.0

result1 = (a + b) + c
result2 = a + (b + c)

print(f"(a + b) + c = {result1}")
print(f"a + (b + c) = {result2}")
print(f"Difference: {result1 - result2}")
print(f"\nAssociativity violated!")

## Order-Dependent Summation

In [None]:
# Sum in different orders
n = 10000
arr = np.random.randn(n)

sum_forward = np.sum(arr)
sum_backward = np.sum(arr[::-1])
sum_sorted_asc = np.sum(np.sort(arr))
sum_sorted_desc = np.sum(np.sort(arr)[::-1])

print("Summation Order Dependence")
print("=" * 50)
print(f"Forward:        {sum_forward:.16e}")
print(f"Backward:       {sum_backward:.16e}")
print(f"Sorted (asc):   {sum_sorted_asc:.16e}")
print(f"Sorted (desc):  {sum_sorted_desc:.16e}")
print(f"\nMax difference: {max(abs(sum_forward - sum_backward), abs(sum_sorted_asc - sum_sorted_desc)):.2e}")

## Reproducibility Challenge

Different execution orders (e.g., parallel) can give different results!

In [None]:
# Simulate parallel reduction with different orderings
def parallel_sum_simulation(arr, n_threads):
    """Simulate parallel sum with different chunk orderings."""
    chunk_size = len(arr) // n_threads
    partial_sums = []
    
    for i in range(n_threads):
        start = i * chunk_size
        end = start + chunk_size if i < n_threads - 1 else len(arr)
        partial_sums.append(np.sum(arr[start:end]))
    
    return np.sum(partial_sums)

arr = np.random.randn(100000)

results = []
for n_threads in [1, 2, 4, 8, 16]:
    result = parallel_sum_simulation(arr, n_threads)
    results.append(result)
    print(f"{n_threads:2d} threads: {result:.16e}")

print(f"\nRange of results: {max(results) - min(results):.2e}")

## Key Takeaways

1. **Floating-point is not associative**: (a+b)+c ≠ a+(b+c)
2. **Order matters**: Different orderings give different results
3. **Reproducibility**: Parallel execution can be non-deterministic
4. **Solution**: Use deterministic ordering or compensated summation