In [None]:
import numpy as np

# 1. Setup dimensions
n = 5000  # Number of coordinate slices (the "width")
d = 500    # Number of input vectors (the "input dimension")

# 2. Define Program Vectors
# X consists of 500 input vectors, each of length n.
# In the Master Theorem, these are vectors h^1...h^500
X = np.random.normal(0, 1, size=(n, d))

# W is the shared weight matrix (n x n)
W_scaled_std = np.sqrt(1/n)
W = np.random.normal(0, W_scaled_std, size=(n, n))

# y is the output vector (n x d).
# Every column y_j = W * x_j
y = np.dot(W, X)

# 3. Define Functions psi(x, y) to verify Theorem 2.10
# We test a function on the first input vector and first output vector
functions = {
    "Sin/Cos Product": lambda x_vec, y_vec: np.sin(x_vec[:, 0]) * np.cos(y_vec[:, 0]),
    "Non-linear Interaction": lambda x_vec, y_vec: np.maximum(0, x_vec[:, 0]) * y_vec[:, 0]**2,
}

print(f"--- NETSOR Master Theorem Verification (n={n}, d={d}) ---")

# 4. Numerical Integration for Theoretical RHS (Monte Carlo)
z_x = np.random.normal(0, 1, 1000000)
z_y = np.random.normal(0, 1, 1000000)

for name, psi in functions.items():
    # LHS: Empirical average over n slices
    # The function takes the full vectors and returns a value per slice
    lhs = np.mean(psi(X, y))

    # RHS: Gaussian expectation
    rhs = np.mean(psi(z_x.reshape(-1, 1), z_y.reshape(-1, 1)))

    print(f"{name}:")
    print(f"  Empirical (LHS): {lhs:.5f}")
    print(f"  Theoretical (RHS): {rhs:.5f}")
    print(f"  Error: {abs(lhs - rhs):.5f}\n")

# --- APPLY RELU ---

# 5. Apply ReLU coordinate-wise to the entire output matrix
# As per Remark 2.11, Z^h is uniquely defined as ReLU(Z^y)
h = np.maximum(0, y)

print(f"ReLU applied to all {n}x{d} entries in the output matrix.")
print(f"Shape of activated matrix h: {h.shape}")

--- NETSOR Master Theorem Verification (n=5000, d=500) ---
Sin/Cos Product:
  Empirical (LHS): 0.00462
  Theoretical (RHS): 0.00021
  Error: 0.00441

Non-linear Interaction:
  Empirical (LHS): 0.40469
  Theoretical (RHS): 0.39887
  Error: 0.00582

ReLU applied to all 5000x500 entries in the output matrix.
Shape of activated matrix h: (5000, 500)
