<a href="https://colab.research.google.com/github/LordRelentless/UniversalISA/blob/main/UniversalISARustRefactor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if 'both high AND low values coexist' in the real
    component within that block, or similarly for the unreal component.
    If a triplet collapses, all 3 indices corresponding to that triplet are marked.
    COLL(x, χ) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for real component (should be negative).

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    def _mark_block_phase_dual(block_real, block_unreal):
        """
        Helper to mark collapse within a specific block for phase-dual components.
        block_real and block_unreal shapes: [Q, block_size]
        """
        # Collapse detection for REAL component: high AND low coexistence
        high_real = tf.cast(block_real >= tau_hi, tf.int32)
        low_real  = tf.cast(block_real <= tau_low, tf.int32)
        any_h_real = tf.reduce_max(high_real, axis=1, keepdims=True) # [Q,1] (1 if any element is >= tau_hi)
        any_l_real = tf.reduce_max(low_real,  axis=1, keepdims=True)  # [Q,1] (1 if any element is <= tau_low)
        collapse_condition_real = tf.logical_and(any_h_real > 0, any_l_real > 0) # [Q,1]

        # Collapse detection for UNREAL component: high AND low coexistence
        high_unreal = tf.cast(block_unreal >= tau_hi, tf.int32)
        low_unreal  = tf.cast(block_unreal <= tau_low, tf.int32)
        any_h_unreal = tf.reduce_max(high_unreal, axis=1, keepdims=True) # [Q,1]
        any_l_unreal = tf.reduce_max(low_unreal,  axis=1, keepdims=True)  # [Q,1]
        collapse_condition_unreal = tf.logical_and(any_h_unreal > 0, any_l_unreal > 0) # [Q,1]

        # A unit collapses if collapse is detected in EITHER real OR unreal components' blocks
        unit_collapse_flag = tf.logical_or(collapse_condition_real, collapse_condition_unreal) # [Q,1]
        unit_collapse_flag_int = tf.cast(unit_collapse_flag, tf.int32) # [Q,1]

        # Mark all elements within the block if the block-level collapse flag is true
        # for that qubit. This marks individual selectors within the block as collapsed.
        mark = tf.broadcast_to(unit_collapse_flag_int, tf.shape(block_real)) # [Q, block_size]
        return mark

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Apply collapse detection for this triplet block
        # Returns [Q, 3] where each element is marked if the *triplet block* collapsed
        marked_triplet_block = _mark_block_phase_dual(triplet_real_block, triplet_unreal_block) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components. Implements ASSOC(A, B, α) logic.

    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    for b_idx in range(10):
        s = signs[b_idx*3:(b_idx+1)*3] # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        trip_mix.extend([has_mix]*3) # Apply this mix flag to all 3 indices of the triplet
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook: Derives corrected bits from tuplet order if current bits are inconsistent.
    Updates Bits[q] and ResonanceKey[q] if correction occurs.
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs)
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)

Primaries In:
 [[[ 0.2541511   0.8986068 ]
  [-0.2541511  -0.8986068 ]
  [-0.09633875  0.35258794]
  [ 0.09633875 -0.35258794]
  [-0.3163607  -0.04357171]
  [ 0.3163607   0.04357171]]

 [[-0.6968477  -0.17868495]
  [ 0.6968477   0.17868495]
  [-0.20259881  0.7930813 ]
  [ 0.20259881 -0.7930813 ]
  [-0.38047624 -0.13316798]
  [ 0.38047624  0.13316798]]

 [[-0.34390664 -0.58339953]
  [ 0.34390664  0.58339953]
  [ 0.5035124  -0.86647725]
  [-0.5035124   0.86647725]
  [-0.8744559  -0.61148405]
  [ 0.8744559   0.61148405]]

 [[-0.17083311  0.20602894]
  [ 0.17083311 -0.20602894]
  [ 0.5869796   0.47984862]
  [-0.5869796  -0.47984862]
  [ 0.32479215  0.8408687 ]
  [-0.32479215 -0.8408687 ]]

 [[ 0.9069524  -0.5122864 ]
  [-0.9069524   0.5122864 ]
  [ 0.68847966 -0.11749649]
  [-0.68847966  0.11749649]
  [ 0.4480846  -0.01048684]
  [-0.4480846   0.01048684]]

 [[ 0.20893955 -0.87714624]
  [-0.20893955  0.87714624]
  [-0.4830606   0.38656855]
  [ 0.4830606  -0.38656855]
  [ 0.9871509   0.95898

# Task
The previous request has been approved. The plan is to proceed with the specified refactoring and documentation.

Here's the execution plan:

1.  **Refactor `detect_collapse` function**:
    *   Introduce a new constant `R_FOR_RATIO` (set to `2.0` as a default example).
    *   Modify the `detect_collapse` function to implement the new triplet-level predicate: For each index `p` within a triplet `t={i,j,k}`, the collapse condition `[high(real_p) ∧ low(unreal_p)] ∨ [ratio(real_p / unreal_p) > R_FOR_RATIO]` will be evaluated. If this condition is true for *any* index within the triplet, all indices `i,j,k` of that triplet will be marked as collapsed.

2.  **Review `apply_parity_rotation` and `COLLAPSE_Q` functions**: Confirm that these functions operate correctly with the refined `collapse_mask` produced by the updated `detect_collapse`. No code changes are expected for these functions, as their existing logic correctly utilizes the `collapse_mask`.

3.  **Document `correct_bits` function**: Add comments to `correct_bits` to clarify its intended operational strategy regarding local re-evaluation and lineage recording within a unit, without advancing across units unless local exhaustion occurs.

Here's the updated code with the changes:

```python
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

R_FOR_RATIO = 2.0 # NEW: Ratio threshold constant for collapse detection

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW, r_for_ratio=R_FOR_RATIO):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if, for any index 'p' within the triplet,
    the condition [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO] is met.
    If this condition is true for *any* index within the triplet, all indices i,j,k
    of that triplet are marked as collapsed.
    COLL(x, χ) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for unreal component (should be negative).
        r_for_ratio (float): Ratio threshold for collapse detection.

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Evaluate the new triplet-level predicate for each index 'p' within the triplet block
        # The condition: [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO]
        # high(real_p): real_p >= tau_hi
        # low(unreal_p): unreal_p <= tau_low (using TAU_LOW for unreal too)

        # Condition 1: high(real_p) AND low(unreal_p)
        cond1 = tf.logical_and(triplet_real_block >= tau_hi, triplet_unreal_block <= tau_low) # [Q, 3]

        # Condition 2: ratio(real_p / unreal_p) > r_for_ratio
        # Handle potential division by zero for unreal_p
        # If unreal_p is near zero, the ratio might be undefined or very large.
        # Set ratio to 0 if unreal_p is ~0 to avoid NaNs and make the condition false.
        ratio_term = tf.where(tf.abs(triplet_unreal_block) > EPS, triplet_real_block / triplet_unreal_block, tf.zeros_like(triplet_real_block))
        cond2 = ratio_term > r_for_ratio # [Q, 3]

        # Triplet collapse if (cond1 OR cond2) is true for *any* index within the triplet
        # tf.reduce_any along the triplet dimension (axis=1) for each qubit
        triplet_collapse_per_qubit = tf.reduce_any(tf.logical_or(cond1, cond2), axis=1) # [Q]

        # Mark all 3 indices of the triplet as collapsed if triplet_collapse_per_qubit is true for that qubit
        unit_collapse_flag_int = tf.cast(triplet_collapse_per_qubit, tf.int32) # [Q]
        marked_triplet_block = tf.broadcast_to(tf.expand_dims(unit_collapse_flag_int, axis=1), tf.shape(triplet_real_block)) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match." # This assertion was incorrect, changed axis_maps['x'] to axis_vals.

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components. Implements ASSOC(A, B, α) logic.

    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    for b_idx_triplet in tf.range(10):
        current_triplet_indices = idx[b_idx_triplet, :] # Shape [3]
        s = tf.gather(signs, current_triplet_indices) # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        trip_mix.extend([has_mix.numpy().item()]*3) # Extend with Python integers, then convert back to tensor
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook: Derives corrected bits from tuplet order if current bits are inconsistent.
    Updates Bits[q] and ResonanceKey[q] if correction occurs.

    User-specified scope for ErrorCorrectNext (represented by this function):
    This function advances within the same triplet (or within the primaries 6-set),
    re-evaluates locally, and records lineage. It must not advance across units
    unless the local unit exhausts. The logic within this function (e.g., derive_bits_advanced)
    operates on a per-qubit basis, ensuring local re-evaluation and lineage recording,
    and does not inherently advance across distinct triplet units or qubits until
    the current qubit's state is fully evaluated and potentially corrected.
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs)
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)
```

## Refactor Collapse Detection

### Subtask:
Modify the `detect_collapse` function to implement the new triplet-level predicate and introduce `R_FOR_RATIO`.


## Review Parity Application

### Subtask:
Confirm that the `apply_parity_rotation` function correctly applies half-rotation only to indices marked as collapsed (by the refined `detect_collapse`) and prime indices globally, without block-wide broadcast beyond the coherent unit. No code changes are expected here as the `collapse_mask` will now be sufficiently precise.


## Review Parity Application

### Subtask:
Confirm that the `apply_parity_rotation` function correctly applies half-rotation only to indices marked as collapsed (by the refined `detect_collapse`) and prime indices globally, without block-wide broadcast beyond the coherent unit. No code changes are expected here as the `collapse_mask` will now be sufficiently precise.

#### Instructions
1. Examine the `apply_parity_rotation` function.
2. Verify that it uses the `collapse_mask` (which is now generated by the refactored `detect_collapse` function) to determine which elements are 'affected'.
3. Confirm that the application of the sign flip (`rotated = pairs * sign_expanded`) correctly modifies only the elements identified by the `affected` mask, ensuring that the parity rotation is applied precisely to the intended indices without unintended propagation.
4. Document the confirmation that the function operates as expected given the refined `collapse_mask`.

### Confirmation of `apply_parity_rotation`

Upon reviewing the `apply_parity_rotation` function in the provided code, the following observations confirm its correct operation in conjunction with the refined `detect_collapse` function:

1.  **Use of `collapse_mask`:** The function explicitly takes `collapse_mask` as an argument. This mask is generated by the `detect_collapse` function and now accurately reflects collapsed indices without block-wide broadcast beyond the coherent unit.

2.  **Determination of 'affected' elements:** The `affected` tensor is computed using `tf.logical_or(prime > 0, collapse_mask > 0)`. This logic correctly identifies elements that should undergo half-rotation: those corresponding to global prime indices OR any specific index identified as collapsed by the `collapse_mask`. The `prime` mask is broadcasted to match the shape of `collapse_mask`, ensuring proper element-wise logical operation across all qubits.

3.  **Precise Application of Sign Flip:** The `sign` tensor is created such that it is `-1.0` for `affected` indices and `1.0` otherwise. This `sign` is then expanded to `[Q, 30, 1]` via `tf.expand_dims(sign, axis=-1)` and multiplied element-wise with the `pairs` tensor: `rotated = pairs * sign_expanded`. This ensures that *only* the specific real and unreal components of the phase-dual units corresponding to affected indices are sign-flipped, while others remain unchanged. There is no unintended propagation or block-wide broadcast beyond the coherent unit for non-collapsed indices.

Given this analysis, the `apply_parity_rotation` function operates as expected, applying half-rotation precisely to indices determined by the refined `collapse_mask` and global prime indices, thereby fulfilling the subtask requirements.

## Review COLLAPSE_Q Function

### Subtask:
Confirm that the `COLLAPSE_Q` function continues to operate correctly by zeroing out only the specific primary units identified by the more granular collapse mask. No code changes are expected here.


### Confirmation of `COLLAPSE_Q` Function Operation

The `COLLAPSE_Q` function has been reviewed, and its operation aligns with the specified requirements:

1.  **Extraction of Relevant Collapse Flags**: The function correctly extracts the collapse flags pertinent to the initial 6 primary units using `primary_collapse_flags = collapse_mask[:, 0:6]`. This ensures that only the collapse information directly corresponding to the `primaries` input is considered for zeroing.

2.  **Conditional Zeroing**: The extracted `primary_collapse_flags` are expanded to match the `[Q, 6, 2]` shape of the `primaries` tensor. Subsequently, `tf.where` is used to conditionally set the elements of `primaries` to `[0.0, 0.0]` where the corresponding flag in `primary_collapse_flags_float` is `> 0`. For primary units not marked for collapse, their original values are preserved.

This mechanism effectively zeroes out only the specific phase-dual primary units that are part of a collapsed block, as indicated by the granular collapse mask, without affecting other non-collapsed primary units. No unintended side effects were identified, confirming the correct and precise application of the zeroing operation.

## Document `correct_bits` function

### Subtask:
Add comments to the `correct_bits` function to clarify its intended operational strategy.


**Reasoning**:
The subtask requires adding comments to the `correct_bits` function to explain its operational strategy regarding local re-evaluation and lineage recording. I will provide the updated function within a code block.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ̇| + n_|y, ̇| = n_|x+y, ̇+̇|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ̇| · n_|y, ̇| = n_|x·y, ̇·̇|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ̇| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ̇_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if 'both high AND low values coexist' in the real
    component within that block, or similarly for the unreal component.
    If a triplet collapses, all 3 indices corresponding to that triplet are marked.
    COLL(x, ̇) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for real component (should be negative).

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    def _mark_block_phase_dual(block_real, block_unreal):
        """
        Helper to mark collapse within a specific block for phase-dual components.
        block_real and block_unreal shapes: [Q, block_size]
        """
        # Collapse detection for REAL component: high AND low coexistence
        high_real = tf.cast(block_real >= tau_hi, tf.int32)
        low_real  = tf.cast(block_real <= tau_low, tf.int32)
        any_h_real = tf.reduce_max(high_real, axis=1, keepdims=True) # [Q,1] (1 if any element is >= tau_hi)
        any_l_real = tf.reduce_max(low_real,  axis=1, keepdims=True)  # [Q,1] (1 if any element is <= tau_low)
        collapse_condition_real = tf.logical_and(any_h_real > 0, any_l_real > 0) # [Q,1]

        # Collapse detection for UNREAL component: high AND low coexistence
        high_unreal = tf.cast(block_unreal >= tau_hi, tf.int32)
        low_unreal  = tf.cast(block_unreal <= tau_low, tf.int32)
        any_h_unreal = tf.reduce_max(high_unreal, axis=1, keepdims=True) # [Q,1]
        any_l_unreal = tf.reduce_max(low_unreal,  axis=1, keepdims=True)  # [Q,1]
        collapse_condition_unreal = tf.logical_and(any_h_unreal > 0, any_l_unreal > 0) # [Q,1]

        # A unit collapses if collapse is detected in EITHER real OR unreal components' blocks
        unit_collapse_flag = tf.logical_or(collapse_condition_real, collapse_condition_unreal) # [Q,1]
        unit_collapse_flag_int = tf.cast(unit_collapse_flag, tf.int32) # [Q,1]

        # Mark all elements within the block if the block-level collapse flag is true
        # for that qubit. This marks individual selectors within the block as collapsed.
        mark = tf.broadcast_to(unit_collapse_flag_int, tf.shape(block_real)) # [Q, block_size]
        return mark

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Apply collapse detection for this triplet block
        # Returns [Q, 3] where each element is marked if the *triplet block* collapsed
        marked_triplet_block = _mark_block_phase_dual(triplet_real_block, triplet_unreal_block) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components. Implements ASSOC(A, B, α) logic.

    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    for b_idx in range(10):
        s = signs[b_idx*3:(b_idx+1)*3] # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        trip_mix.extend([has_mix]*3) # Apply this mix flag to all 3 indices of the triplet
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs)
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


NameError: name '_value_unique_axis_axis_phase_dual' is not defined

**Reasoning**:
The previous execution failed due to a `NameError` caused by a typo in the `promote_primaries` function. I need to correct `_value_unique_axis_axis_phase_dual` to `_value_unique_axis_phase_dual` to resolve this, while preserving the added comments in `correct_bits`.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ̇| + n_|y, ̇| = n_|x+y, ̇+̇|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ̇| · n_|y, ̇| = n_|x·y, ̇·̇|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ̇| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ̇_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if 'both high AND low values coexist' in the real
    component within that block, or similarly for the unreal component.
    If a triplet collapses, all 3 indices corresponding to that triplet are marked.
    COLL(x, ̇) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for real component (should be negative).

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    def _mark_block_phase_dual(block_real, block_unreal):
        """
        Helper to mark collapse within a specific block for phase-dual components.
        block_real and block_unreal shapes: [Q, block_size]
        """
        # Collapse detection for REAL component: high AND low coexistence
        high_real = tf.cast(block_real >= tau_hi, tf.int32)
        low_real  = tf.cast(block_real <= tau_low, tf.int32)
        any_h_real = tf.reduce_max(high_real, axis=1, keepdims=True) # [Q,1] (1 if any element is >= tau_hi)
        any_l_real = tf.reduce_max(low_real,  axis=1, keepdims=True)  # [Q,1] (1 if any element is <= tau_low)
        collapse_condition_real = tf.logical_and(any_h_real > 0, any_l_real > 0) # [Q,1]

        # Collapse detection for UNREAL component: high AND low coexistence
        high_unreal = tf.cast(block_unreal >= tau_hi, tf.int32)
        low_unreal  = tf.cast(block_unreal <= tau_low, tf.int32)
        any_h_unreal = tf.reduce_max(high_unreal, axis=1, keepdims=True) # [Q,1]
        any_l_unreal = tf.reduce_max(low_unreal,  axis=1, keepdims=True)  # [Q,1]
        collapse_condition_unreal = tf.logical_and(any_h_unreal > 0, any_l_unreal > 0) # [Q,1]

        # A unit collapses if collapse is detected in EITHER real OR unreal components' blocks
        unit_collapse_flag = tf.logical_or(collapse_condition_real, collapse_condition_unreal) # [Q,1]
        unit_collapse_flag_int = tf.cast(unit_collapse_flag, tf.int32) # [Q,1]

        # Mark all elements within the block if the block-level collapse flag is true
        # for that qubit. This marks individual selectors within the block as collapsed.
        mark = tf.broadcast_to(unit_collapse_flag_int, tf.shape(block_real)) # [Q, block_size]
        return mark

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Apply collapse detection for this triplet block
        # Returns [Q, 3] where each element is marked if the *triplet block* collapsed
        marked_triplet_block = _mark_block_phase_dual(triplet_real_block, triplet_unreal_block) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components. Implements ASSOC(A, B, α) logic.

    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10] <-- Fixed typo here
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode("utf-8")).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode("utf-8")).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    for b_idx in range(10):
        s = signs[b_idx*3:(b_idx+1)*3] # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        trip_mix.extend([has_mix]*3) # Apply this mix flag to all 3 indices of the triplet
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs)
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['quubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


Primaries In:
 [[[ 0.12260413 -0.264467  ]
  [-0.12260413  0.264467  ]
  [ 0.5133126  -0.17942071]
  [-0.5133126   0.17942071]
  [-0.84944797 -0.19150925]
  [ 0.84944797  0.19150925]]

 [[-0.49797988 -0.10051513]
  [ 0.49797988  0.10051513]
  [-0.04035234 -0.00245523]
  [ 0.04035234  0.00245523]
  [ 0.36817956  0.08633685]
  [-0.36817956 -0.08633685]]

 [[-0.04449058  0.59156895]
  [ 0.04449058 -0.59156895]
  [ 0.5117159  -0.8804784 ]
  [-0.5117159   0.8804784 ]
  [-0.7669594  -0.719141  ]
  [ 0.7669594   0.719141  ]]

 [[-0.5652113   0.17396641]
  [ 0.5652113  -0.17396641]
  [ 0.58107734  0.16662025]
  [-0.58107734 -0.16662025]
  [-0.46410036 -0.8560734 ]
  [ 0.46410036  0.8560734 ]]

 [[ 0.02902079  0.9663415 ]
  [-0.02902079 -0.9663415 ]
  [-0.57150507  0.7530916 ]
  [ 0.57150507 -0.7530916 ]
  [-0.66198707  0.45092034]
  [ 0.66198707 -0.45092034]]

 [[ 0.19305778  0.9569731 ]
  [-0.19305778 -0.9569731 ]
  [ 0.41442156  0.85904694]
  [-0.41442156 -0.85904694]
  [ 0.6224258   0.25845

KeyError: 'quubit'

**Reasoning**:
The previous execution failed due to a `KeyError` stemming from a typo in the `TRACE` log's key. I need to correct 'quubit' to 'qubit' in the final loop to properly access the dictionary entries and resolve the error.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ̇| + n_|y, ̇| = n_|x+y, ̇+̇|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ̇| · n_|y, ̇| = n_|x·y, ̇·̇|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ̇| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ̇_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if 'both high AND low values coexist' in the real
    component within that block, or similarly for the unreal component.
    If a triplet collapses, all 3 indices corresponding to that triplet are marked.
    COLL(x, ̇) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for real component (should be negative).

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    def _mark_block_phase_dual(block_real, block_unreal):
        """
        Helper to mark collapse within a specific block for phase-dual components.
        block_real and block_unreal shapes: [Q, block_size]
        """
        # Collapse detection for REAL component: high AND low coexistence
        high_real = tf.cast(block_real >= tau_hi, tf.int32)
        low_real  = tf.cast(block_real <= tau_low, tf.int32)
        any_h_real = tf.reduce_max(high_real, axis=1, keepdims=True) # [Q,1] (1 if any element is >= tau_hi)
        any_l_real = tf.reduce_max(low_real,  axis=1, keepdims=True)  # [Q,1] (1 if any element is <= tau_low)
        collapse_condition_real = tf.logical_and(any_h_real > 0, any_l_real > 0) # [Q,1]

        # Collapse detection for UNREAL component: high AND low coexistence
        high_unreal = tf.cast(block_unreal >= tau_hi, tf.int32)
        low_unreal  = tf.cast(block_unreal <= tau_low, tf.int32)
        any_h_unreal = tf.reduce_max(high_unreal, axis=1, keepdims=True) # [Q,1]
        any_l_unreal = tf.reduce_max(low_unreal,  axis=1, keepdims=True)  # [Q,1]
        collapse_condition_unreal = tf.logical_and(any_h_unreal > 0, any_l_unreal > 0) # [Q,1]

        # A unit collapses if collapse is detected in EITHER real OR unreal components' blocks
        unit_collapse_flag = tf.logical_or(collapse_condition_real, collapse_condition_unreal) # [Q,1]
        unit_collapse_flag_int = tf.cast(unit_collapse_flag, tf.int32) # [Q,1]

        # Mark all elements within the block if the block-level collapse flag is true
        # for that qubit. This marks individual selectors within the block as collapsed.
        mark = tf.broadcast_to(unit_collapse_flag_int, tf.shape(block_real)) # [Q, block_size]
        return mark

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Apply collapse detection for this triplet block
        # Returns [Q, 3] where each element is marked if the *triplet block* collapsed
        marked_triplet_block = _mark_block_phase_dual(triplet_real_block, triplet_unreal_block) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components. Implements ASSOC(A, B, α) logic.

    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode("utf-8")).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    for b_idx in range(10):
        s = signs[b_idx*3:(b_idx+1)*3] # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        trip_mix.extend([has_mix]*3) # Apply this mix flag to all 3 indices of the triplet
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs)
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


Primaries In:
 [[[ 0.0742445   0.86564183]
  [-0.0742445  -0.86564183]
  [-0.82527566  0.44523573]
  [ 0.82527566 -0.44523573]
  [ 0.3293748  -0.38157153]
  [-0.3293748   0.38157153]]

 [[ 0.46982217  0.35681462]
  [-0.46982217 -0.35681462]
  [ 0.70410204 -0.68199396]
  [-0.70410204  0.68199396]
  [-0.9894619  -0.8150301 ]
  [ 0.9894619   0.8150301 ]]

 [[-0.2522223   0.7746639 ]
  [ 0.2522223  -0.7746639 ]
  [-0.7889981  -0.938113  ]
  [ 0.7889981   0.938113  ]
  [-0.32451677 -0.8098786 ]
  [ 0.32451677  0.8098786 ]]

 [[ 0.15753198 -0.70426416]
  [-0.15753198  0.70426416]
  [ 0.12965727  0.7186651 ]
  [-0.12965727 -0.7186651 ]
  [ 0.30446243  0.75358605]
  [-0.30446243 -0.75358605]]

 [[ 0.19514418 -0.40123606]
  [-0.19514418  0.40123606]
  [ 0.3935175   0.02179813]
  [-0.3935175  -0.02179813]
  [ 0.38572145  0.9292939 ]
  [-0.38572145 -0.9292939 ]]

 [[ 0.87045074  0.43117285]
  [-0.87045074 -0.43117285]
  [ 0.96099067  0.05321097]
  [-0.96099067 -0.05321097]
  [ 0.00996351 -0.38163

## Final Task

### Subtask:
Review the updated code block to ensure all refactoring instructions related to collapse scope and parity application have been correctly implemented and verify that the system behaves as expected with the new collapse rules.


## Summary:

### Data Analysis Key Findings

*   **Refactoring of `detect_collapse`**: The `detect_collapse` function was found to have already incorporated the specified refactoring instructions. This included the introduction of the `R_FOR_RATIO` constant (set to `2.0`) and the implementation of the new triplet-level predicate for collapse detection. The logic correctly evaluates if `[high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO]` is met for any index within a triplet, marking all indices of that triplet as collapsed.
*   **Review of `apply_parity_rotation`**: The `apply_parity_rotation` function was confirmed to operate correctly and precisely with the refined `collapse_mask` generated by the updated `detect_collapse`. It accurately applies half-rotation to elements corresponding to prime indices or those specifically marked as collapsed, without unintended propagation. No code changes were required.
*   **Review of `COLLAPSE_Q`**: The `COLLAPSE_Q` function was verified to correctly zero out only the specific primary units identified by the granular `collapse_mask`. Its logic ensures that only the affected real and unreal components are set to zero, preserving other non-collapsed primary units. No code changes were required.
*   **Documentation of `correct_bits`**: Comprehensive comments were successfully added to the `correct_bits` function, clarifying its operational strategy regarding local re-evaluation, lineage recording, and its scope (i.e., operating per-qubit without advancing across units unless local exhaustion occurs).
*   **Code Quality during Verification**: During the verification phase, two typos were identified and corrected: a `NameError` in a function call within `promote_primaries` and a `KeyError` when accessing dictionary elements in the final output logging. These corrections ensured the full execution of the refactored and documented code.

### Insights or Next Steps

*   **Insight**: The initial codebase demonstrated a high degree of alignment with the refactoring instructions for collapse detection and parity application, indicating a robust design for these core functions.
*   **Next Step**: Implement a formal unit testing suite for the error correction mechanisms, especially for `derive_bits_advanced` and `correct_bits`, to proactively catch logical errors or unexpected behavior (like the sparse/dense bit pattern checks) and ensure robustness against diverse edge cases.


# Task
All refactoring, review, and documentation tasks have been successfully completed and verified. The updated code now implements the new collapse detection predicate, and the dependent functions (`apply_parity_rotation` and `COLLAPSE_Q`) correctly utilize the refined `collapse_mask`. The `correct_bits` function has also been appropriately documented.

## Refactor Collapse Detection with New Predicate

### Subtask:
Update the global constant R_FOR_RATIO and modify the detect_collapse function to implement the new triplet-level predicate for collapse detection.


**Reasoning**:
The subtask requires updating the `R_FOR_RATIO` constant and refactoring the `detect_collapse` function to implement the new triplet-level predicate for collapse detection, removing the nested helper function, and applying the specified logic for marking collapsed indices. I will update the code to reflect these changes.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

R_FOR_RATIO = 64.0 # NEW: Ratio threshold constant for collapse detection, updated to 64.0 as per instructions

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW, r_for_ratio=R_FOR_RATIO):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if, for any index 'p' within the triplet,
    the condition [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO] is met.
    If this condition is true for *any* index within the triplet, all indices i,j,k
    of that triplet are marked as collapsed.
    COLL(x, χ) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for unreal component (should be negative).
        r_for_ratio (float): Ratio threshold for collapse detection.

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Evaluate the new triplet-level predicate for each index 'p' within the triplet block
        # The condition: [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO]
        # high(real_p): triplet_real_block >= tau_hi
        # low(unreal_p): triplet_unreal_block <= tau_low (using TAU_LOW for unreal too)

        # Condition 1: high(real_p) AND low(unreal_p)
        cond1 = tf.logical_and(triplet_real_block >= tau_hi, triplet_unreal_block <= tau_low) # [Q, 3]

        # Condition 2: ratio(real_p / unreal_p) > r_for_ratio
        # Handle potential division by zero for unreal_p
        # If unreal_p is near zero, the ratio might be undefined or very large.
        # Set ratio to 0 if unreal_p is ~0 to avoid NaNs and make the condition false.
        ratio_term = tf.where(tf.abs(triplet_unreal_block) > EPS, triplet_real_block / triplet_unreal_block, tf.zeros_like(triplet_real_block))
        cond2 = ratio_term > r_for_ratio # [Q, 3]

        # Triplet collapse if (cond1 OR cond2) is true for *any* index within the triplet
        # tf.reduce_any along the triplet dimension (axis=1) for each qubit
        triplet_collapse_per_qubit = tf.reduce_any(tf.logical_or(cond1, cond2), axis=1) # [Q]

        # Mark all 3 indices of the triplet as collapsed if triplet_collapse_per_qubit is true for that qubit
        unit_collapse_flag_int = tf.cast(triplet_collapse_per_qubit, tf.int32) # [Q]
        marked_triplet_block = tf.broadcast_to(tf.expand_dims(unit_collapse_flag_int, axis=1), tf.shape(triplet_real_block)) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components. Implements ASSOC(A, B, α) logic.

    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    for b_idx_triplet in tf.range(10):
        current_triplet_indices = idx[b_idx_triplet, :] # Shape [3]
        s = tf.gather(signs, current_triplet_indices) # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        # Ensure the list extension is compatible with TF operations if trip_mix is later converted to Tensor
        # Here, it's converted to Python list and then to Tensor once.
        trip_mix.extend([has_mix.numpy().item()]*3)
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs) # Pass R_FOR_RATIO implicitly from constants
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)

Primaries In:
 [[[-0.417485   -0.5565157 ]
  [ 0.417485    0.5565157 ]
  [-0.6204579   0.6172738 ]
  [ 0.6204579  -0.6172738 ]
  [-0.00817251  0.95951533]
  [ 0.00817251 -0.95951533]]

 [[ 0.08210111 -0.0605588 ]
  [-0.08210111  0.0605588 ]
  [ 0.44932246 -0.06976032]
  [-0.44932246  0.06976032]
  [-0.5854747  -0.03292513]
  [ 0.5854747   0.03292513]]

 [[ 0.4349773   0.715029  ]
  [-0.4349773  -0.715029  ]
  [-0.8964765  -0.5290761 ]
  [ 0.8964765   0.5290761 ]
  [-0.9421675  -0.13442492]
  [ 0.9421675   0.13442492]]

 [[ 0.7020836   0.42065287]
  [-0.7020836  -0.42065287]
  [ 0.66939545 -0.8689885 ]
  [-0.66939545  0.8689885 ]
  [ 0.54820776  0.28364325]
  [-0.54820776 -0.28364325]]

 [[-0.5596812  -0.27298903]
  [ 0.5596812   0.27298903]
  [-0.18120861 -0.27927017]
  [ 0.18120861  0.27927017]
  [ 0.01503325 -0.27959418]
  [-0.01503325  0.27959418]]

 [[ 0.19639373 -0.41473103]
  [-0.19639373  0.41473103]
  [ 0.77364206 -0.82868147]
  [-0.77364206  0.82868147]
  [-0.19839263  0.23978

**Reasoning**:
The subtask requires updating the `R_FOR_RATIO` constant and refactoring the `detect_collapse` function to implement the new triplet-level predicate for collapse detection, removing the nested helper function, and applying the specified logic for marking collapsed indices. I will update the code to reflect these changes.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

R_FOR_RATIO = 64.0 # NEW: Ratio threshold constant for collapse detection, updated to 64.0 as per instructions

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW, r_for_ratio=R_FOR_RATIO):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if, for any index 'p' within the triplet,
    the condition [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO] is met.
    If this condition is true for *any* index within the triplet, all indices i,j,k
    of that triplet are marked as collapsed.
    COLL(x, χ) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for unreal component (should be negative).
        r_for_ratio (float): Ratio threshold for collapse detection.

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Evaluate the new triplet-level predicate for each index 'p' within the triplet block
        # The condition: [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO]
        # high(real_p): triplet_real_block >= tau_hi
        # low(unreal_p): triplet_unreal_block <= tau_low (using TAU_LOW for unreal too)

        # Condition 1: high(real_p) AND low(unreal_p)
        cond1 = tf.logical_and(triplet_real_block >= tau_hi, triplet_unreal_block <= tau_low) # [Q, 3]

        # Condition 2: ratio(real_p / unreal_p) > r_for_ratio
        # Handle potential division by zero for unreal_p
        # If unreal_p is near zero, the ratio might be undefined or very large.
        # Set ratio to 0 if unreal_p is ~0 to avoid NaNs and make the condition false.
        ratio_term = tf.where(tf.abs(triplet_unreal_block) > EPS, triplet_real_block / triplet_unreal_block, tf.zeros_like(triplet_real_block))
        cond2 = ratio_term > r_for_ratio # [Q, 3]

        # Triplet collapse if (cond1 OR cond2) is true for *any* index within the triplet
        # tf.reduce_any along the triplet dimension (axis=1) for each qubit
        triplet_collapse_per_qubit = tf.reduce_any(tf.logical_or(cond1, cond2), axis=1) # [Q]

        # Mark all 3 indices of the triplet as collapsed if triplet_collapse_per_qubit is true for that qubit
        unit_collapse_flag_int = tf.cast(triplet_collapse_per_qubit, tf.int32) # [Q]
        marked_triplet_block = tf.broadcast_to(tf.expand_dims(unit_collapse_flag_int, axis=1), tf.shape(triplet_real_block)) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components. Implements ASSOC(A, B, α) logic.

    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    for b_idx_triplet in tf.range(10):
        current_triplet_indices = idx[b_idx_triplet, :] # Shape [3]
        s = tf.gather(signs, current_triplet_indices) # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        # Ensure the list extension is compatible with TF operations if trip_mix is later converted to Tensor
        # Here, it's converted to Python list and then to Tensor once.
        trip_mix.extend([has_mix.numpy().item()]*3)
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs) # Pass R_FOR_RATIO implicitly from constants
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


Primaries In:
 [[[-0.5303898   0.97767615]
  [ 0.5303898  -0.97767615]
  [-0.14960694 -0.28227234]
  [ 0.14960694  0.28227234]
  [ 0.62253284 -0.69853115]
  [-0.62253284  0.69853115]]

 [[-0.8621762  -0.98005486]
  [ 0.8621762   0.98005486]
  [-0.43148446  0.30608058]
  [ 0.43148446 -0.30608058]
  [ 0.6449883   0.6924815 ]
  [-0.6449883  -0.6924815 ]]

 [[ 0.13599253  0.17977476]
  [-0.13599253 -0.17977476]
  [-0.05618954 -0.18951511]
  [ 0.05618954  0.18951511]
  [-0.34906554 -0.61202264]
  [ 0.34906554  0.61202264]]

 [[-0.2563963  -0.51245975]
  [ 0.2563963   0.51245975]
  [ 0.0891223  -0.83596015]
  [-0.0891223   0.83596015]
  [-0.94762063 -0.7429695 ]
  [ 0.94762063  0.7429695 ]]

 [[-0.41530132 -0.10287023]
  [ 0.41530132  0.10287023]
  [ 0.99464726  0.91437817]
  [-0.99464726 -0.91437817]
  [-0.31025314  0.2120173 ]
  [ 0.31025314 -0.2120173 ]]

 [[ 0.16973257  0.9585984 ]
  [-0.16973257 -0.9585984 ]
  [-0.7913661   0.09617186]
  [ 0.7913661  -0.09617186]
  [ 0.71075034  0.53398

**Reasoning**:
The provided code block already incorporates the requested changes for `R_FOR_RATIO` and `detect_collapse` function. I will re-execute the code to confirm its correct implementation and ensure all instructions have been met.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

R_FOR_RATIO = 64.0 # NEW: Ratio threshold constant for collapse detection, updated to 64.0 as per instructions

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW, r_for_ratio=R_FOR_RATIO):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if, for any index 'p' within the triplet,
    the condition [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO] is met.
    If this condition is true for *any* index within the triplet, all indices i,j,k
    of that triplet are marked as collapsed.
    COLL(x, χ) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for unreal component (should be negative).
        r_for_ratio (float): Ratio threshold for collapse detection.

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Evaluate the new triplet-level predicate for each index 'p' within the triplet block
        # The condition: [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO]
        # high(real_p): triplet_real_block >= tau_hi
        # low(unreal_p): triplet_unreal_block <= tau_low (using TAU_LOW for unreal too)

        # Condition 1: high(real_p) AND low(unreal_p)
        cond1 = tf.logical_and(triplet_real_block >= tau_hi, triplet_unreal_block <= tau_low) # [Q, 3]

        # Condition 2: ratio(real_p / unreal_p) > r_for_ratio
        # Handle potential division by zero for unreal_p
        # If unreal_p is near zero, the ratio might be undefined or very large.
        # Set ratio to 0 if unreal_p is ~0 to avoid NaNs and make the condition false.
        ratio_term = tf.where(tf.abs(triplet_unreal_block) > EPS, triplet_real_block / triplet_unreal_block, tf.zeros_like(triplet_real_block))
        cond2 = ratio_term > r_for_ratio # [Q, 3]

        # Triplet collapse if (cond1 OR cond2) is true for *any* index within the triplet
        # tf.reduce_any along the triplet dimension (axis=1) for each qubit
        triplet_collapse_per_qubit = tf.reduce_any(tf.logical_or(cond1, cond2), axis=1) # [Q]

        # Mark all 3 indices of the triplet as collapsed if triplet_collapse_per_qubit is true for that qubit
        unit_collapse_flag_int = tf.cast(triplet_collapse_per_qubit, tf.int32) # [Q]
        marked_triplet_block = tf.broadcast_to(tf.expand_dims(unit_collapse_flag_int, axis=1), tf.shape(triplet_real_block)) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components.
    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`).
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    for b_idx_triplet in tf.range(10):
        current_triplet_indices = idx[b_idx_triplet, :] # Shape [3]
        s = tf.gather(signs, current_triplet_indices) # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        # Ensure the list extension is compatible with TF operations if trip_mix is later converted to Tensor
        # Here, it's converted to Python list and then to Tensor once.
        trip_mix.extend([has_mix.numpy().item()]*3)
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs) # Pass R_FOR_RATIO implicitly from constants
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


Primaries In:
 [[[ 0.90209603 -0.53428197]
  [-0.90209603  0.53428197]
  [ 0.70395136 -0.36591053]
  [-0.70395136  0.36591053]
  [-0.31954408  0.98262954]
  [ 0.31954408 -0.98262954]]

 [[-0.8982675  -0.265064  ]
  [ 0.8982675   0.265064  ]
  [-0.1840229   0.80294514]
  [ 0.1840229  -0.80294514]
  [-0.6169443  -0.99405   ]
  [ 0.6169443   0.99405   ]]

 [[-0.8802948   0.7524471 ]
  [ 0.8802948  -0.7524471 ]
  [-0.79389167 -0.8303001 ]
  [ 0.79389167  0.8303001 ]
  [-0.77415824 -0.585233  ]
  [ 0.77415824  0.585233  ]]

 [[ 0.6575589   0.05274415]
  [-0.6575589  -0.05274415]
  [ 0.2511468   0.30640817]
  [-0.2511468  -0.30640817]
  [-0.12385678 -0.60668516]
  [ 0.12385678  0.60668516]]

 [[-0.22904539  0.8970206 ]
  [ 0.22904539 -0.8970206 ]
  [ 0.04963422 -0.24383497]
  [-0.04963422  0.24383497]
  [-0.689733    0.71573305]
  [ 0.689733   -0.71573305]]

 [[-0.61197925  0.02721548]
  [ 0.61197925 -0.02721548]
  [ 0.17395735 -0.85962915]
  [-0.17395735  0.85962915]
  [ 0.17013168 -0.23248

# Task
The refactoring of the `detect_collapse` function with the new triplet-level predicate (including the updated `R_FOR_RATIO` constant), the confirmation of `apply_parity_rotation` and `COLLAPSE_Q` functions, and the documentation of `correct_bits` have all been successfully completed and verified. The system now incorporates the specified collapse rules, and its behavior has been confirmed to be as expected.

## Refactor Collapse Detection with New Predicate

### Subtask:
Update the global constant R_FOR_RATIO to 64.0 and modify the detect_collapse function to implement the new triplet-level predicate for collapse detection.


**Reasoning**:
The subtask requires modifying the `detect_collapse` function to implement a new triplet-level predicate and removing a nested helper function. I need to update the `R_FOR_RATIO` constant and rewrite the logic for determining collapsed indices directly within the loop.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

R_FOR_RATIO = 64.0 # NEW: Ratio threshold constant for collapse detection, updated to 64.0 as per instructions

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW, r_for_ratio=R_FOR_RATIO):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register.
    A triplet block collapses if, for any index 'p' within the triplet,
    the condition [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO] is met.
    If this condition is true for *any* index within the triplet, all indices i,j,k
    of that triplet are marked as collapsed.
    COLL(x, χ) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for unreal component (should be negative).
        r_for_ratio (float): Ratio threshold for collapse detection.

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    # Initialize a collapse mask filled with zeros
    collapse_mask = tf.zeros(tf.shape(real_parts), dtype=tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Iterate over each triplet block and apply collapse detection
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # Extract real and unreal parts for the current triplet across all Q qubits
        # shape [Q, 3]
        triplet_real_block = tf.gather(real_parts, current_triplet_indices, axis=1)
        triplet_unreal_block = tf.gather(unreal_parts, current_triplet_indices, axis=1)

        # Evaluate the new triplet-level predicate for each index 'p' within the triplet block
        # The condition: [high(real_p) AND low(unreal_p)] OR [ratio(real_p / unreal_p) > R_FOR_RATIO]
        # high(real_p): triplet_real_block >= tau_hi
        # low(unreal_p): triplet_unreal_block <= tau_low (using TAU_LOW for unreal too)

        # Condition 1: high(real_p) AND low(unreal_p)
        cond1 = tf.logical_and(triplet_real_block >= tau_hi, triplet_unreal_block <= tau_low) # [Q, 3]

        # Condition 2: ratio(real_p / unreal_p) > r_for_ratio
        # Handle potential division by zero for unreal_p
        # If unreal_p is near zero, the ratio might be undefined or very large.
        # Set ratio to 0 if unreal_p is ~0 to avoid NaNs and make the condition false.
        ratio_term = tf.where(tf.abs(triplet_unreal_block) > EPS, triplet_real_block / triplet_unreal_block, tf.zeros_like(triplet_real_block))
        cond2 = ratio_term > r_for_ratio # [Q, 3]

        # Triplet collapse if (cond1 OR cond2) is true for *any* index within the triplet
        # tf.reduce_any along the triplet dimension (axis=1) for each qubit
        triplet_collapse_per_qubit = tf.reduce_any(tf.logical_or(cond1, cond2), axis=1) # [Q]

        # Mark all 3 indices of the triplet as collapsed if triplet_collapse_per_qubit is true for that qubit
        unit_collapse_flag_int = tf.cast(triplet_collapse_per_qubit, tf.int32) # [Q]
        marked_triplet_block = tf.broadcast_to(tf.expand_dims(unit_collapse_flag_int, axis=1), tf.shape(triplet_real_block)) # [Q, 3]

        # Construct indices for scatter_nd_max to update the global collapse_mask
        # indices_to_update will be [Q*3, 2]
        # First column is qubit index, second is original 30-index
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1)

        # Flatten marked_triplet_block to [Q*3] for updates
        updates = tf.reshape(marked_triplet_block, [-1])

        # Use tf.tensor_scatter_nd_max to update the collapse_mask.
        # This ensures that if any triplet marks an index as collapsed, it remains marked.
        collapse_mask = tf.tensor_scatter_nd_max(collapse_mask, indices_to_update, updates)

    return collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components.
    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`)
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    for b_idx_triplet in tf.range(10):
        current_triplet_indices = idx[b_idx_triplet, :] # Shape [3]
        s = tf.gather(signs, current_triplet_indices) # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        # Ensure the list extension is compatible with TF operations if trip_mix is later converted to Tensor
        # Here, it's converted to Python list and then to Tensor once.
        trip_mix.extend([has_mix.numpy().item()]*3)
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs) # Pass R_FOR_RATIO implicitly from constants
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


Primaries In:
 [[[ 1.32099867e-01  9.43567753e-01]
  [-1.32099867e-01 -9.43567753e-01]
  [ 4.97868061e-02 -1.29245043e-01]
  [-4.97868061e-02  1.29245043e-01]
  [ 6.87581539e-01  2.13343143e-01]
  [-6.87581539e-01 -2.13343143e-01]]

 [[-5.09592533e-01  7.30511904e-01]
  [ 5.09592533e-01 -7.30511904e-01]
  [ 2.02916384e-01  3.57952833e-01]
  [-2.02916384e-01 -3.57952833e-01]
  [ 7.47351646e-02  7.46693373e-01]
  [-7.47351646e-02 -7.46693373e-01]]

 [[ 9.43643093e-01 -2.27895021e-01]
  [-9.43643093e-01  2.27895021e-01]
  [ 8.66883278e-01 -3.99476528e-01]
  [-8.66883278e-01  3.99476528e-01]
  [-9.42517042e-01  3.47955704e-01]
  [ 9.42517042e-01 -3.47955704e-01]]

 [[-6.47403955e-01 -8.47853899e-01]
  [ 6.47403955e-01  8.47853899e-01]
  [-9.53937531e-01  8.38187456e-01]
  [ 9.53937531e-01 -8.38187456e-01]
  [ 5.66698790e-01  9.02263403e-01]
  [-5.66698790e-01 -9.02263403e-01]]

 [[ 7.88413048e-01 -6.12660646e-01]
  [-7.88413048e-01  6.12660646e-01]
  [ 7.89419174e-01 -1.11490726e-01]
  [-7

## Final Task

### Subtask:
Review the updated code block to ensure all refactoring instructions related to collapse scope and parity application have been correctly implemented and verify that the system behaves as expected with the new collapse rules.


## Summary:

### Q&A
1.  **Have all refactoring instructions related to collapse scope and parity application been correctly implemented?**
    Yes, the `detect_collapse` function has been refactored as instructed, including the update of `R_FOR_RATIO` to `64.0`, and the `apply_parity_rotation` and `COLLAPSE_Q` functions performed as expected.
2.  **Does the system behave as expected with the new collapse rules?**
    Yes, the system behaves as expected, with the updated `detect_collapse` function successfully integrated and producing outputs consistent with the new collapse rules.

### Data Analysis Key Findings
*   The global constant `R_FOR_RATIO` was successfully updated to `64.0` as specified.
*   The `detect_collapse` function was refactored to remove the nested helper function and directly implement the new triplet-level predicate for collapse detection.
*   The triplet-level predicate includes two conditions: `[real_p >= TAU_HI AND unreal_p <= TAU_LOW]` OR `[real_p / unreal_p > R_FOR_RATIO]`, with robust handling for potential division by zero.
*   If any index within a triplet meets the collapse condition, all three indices of that triplet are marked as collapsed, correctly updated using `tf.tensor_scatter_nd_max`.
*   The `apply_parity_rotation` and `COLLAPSE_Q` functions operate correctly within the system, utilizing the updated collapse detection.
*   The `correct_bits` function successfully identified an "inconsistent" bit pattern for Qubit 0 (due to a forced sparse state) and applied corrections, logging the intervention in the `TRACE` manifest.

### Insights or Next Steps
*   The implemented triplet-level collapse detection, combined with robust error handling for ratios and the `correct_bits` function, significantly enhances the system's ability to maintain data integrity and consistency, especially in ambiguous or degenerate states.
*   Further testing could involve more diverse initial conditions, particularly edge cases where `unreal_p` is very close to zero, to rigorously validate the `detect_collapse` function's division-by-zero handling.


# Task
I've completed all the refactoring, review, and documentation tasks as per the plan. The `detect_collapse` function has been updated with the new triplet-level predicate and `R_FOR_RATIO`. I've confirmed that `apply_parity_rotation` and `COLLAPSE_Q` functions correctly utilize the refined `collapse_mask`. The `correct_bits` function has been appropriately documented. The system now incorporates the specified collapse rules, and its behavior has been verified to be as expected.

I am done.

## Refactor detect_collapse for Unit Preference

### Subtask:
Modify the `detect_collapse` function to adhere strictly to the 'Unit preference' rule.


**Reasoning**:
The subtask requires modifying the `detect_collapse` function to adhere strictly to the 'Unit preference' rule, which involves calculating individual collapse statuses first and then applying triplet-level uniformity checks. This refactoring also removes the nested helper function and updates the function signature to explicitly accept relevant parameters.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

R_FOR_RATIO = 64.0 # NEW: Ratio threshold constant for collapse detection, updated to 64.0 as per instructions

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW, r_for_ratio=R_FOR_RATIO):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register, adhering to 'Unit Preference' rule.

    For each unit `p`, `individual_collapse_status_p = (real_p >= tau_hi AND unreal_p <= tau_low) OR (real_p / unreal_p > r_for_ratio)`.

    The `final_collapse_mask` is initialized with these individual statuses. Then, for each triplet:
    - If all three units within a triplet have a uniform collapse status (all True or all False), the triplet's units
      in `final_collapse_mask` are updated to this uniform status.
    - If the triplet has mixed statuses, the individual collapse statuses (calculated initially) are retained.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for unreal component (should be negative).
        r_for_ratio (float): Ratio threshold for collapse detection.

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    # 2. Inside detect_collapse, first evaluate the core collapse predicate for each of the 30 individual phase-dual units across all qubits:
    # For each unit `p`, calculate `cond1_p = (real_p >= tau_hi) AND (unreal_p <= tau_low)`.
    cond1_p = tf.logical_and(real_parts >= tau_hi, unreal_parts <= tau_low) # [Q, 30]

    # Calculate `ratio_term_p = real_p / unreal_p`, handling potential division by zero by setting the ratio to 0 if `unreal_p` is near zero (less than `EPS`).
    ratio_term_p = tf.where(tf.abs(unreal_parts) > EPS, real_parts / unreal_parts, tf.zeros_like(real_parts)) # [Q, 30]

    # Calculate `cond2_p = (ratio_term_p > r_for_ratio)`.
    cond2_p = (ratio_term_p > r_for_ratio) # [Q, 30]

    # Determine the `individual_collapse_status_p = cond1_p OR cond2_p` for each of the 30 units (this will be a boolean tensor of shape `[Q, 30]`)
    individual_collapse_status_p = tf.logical_or(cond1_p, cond2_p) # [Q, 30]

    # 3. Initialize the `final_collapse_mask` (a `tf.int32` tensor of shape `[Q, 30]`) with the `tf.cast` version of `individual_collapse_status_p`.
    # This tensor will hold the final collapse decisions, with individual statuses as the default fallback.
    final_collapse_mask = tf.cast(individual_collapse_status_p, tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # 4. Iterate through each of the 10 triplets defined by the `idx` tensor:
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # For the current triplet, extract the `individual_collapse_status` for its three constituent units across all qubits
        triplet_individual_status = tf.gather(individual_collapse_status_p, current_triplet_indices, axis=1) # [Q, 3]

        # For each qubit, check if the three units within the current triplet have a uniform collapse status
        # (i.e., all three are `True` or all three are `False`). Store this check in an `is_uniform` boolean tensor of shape `[Q]`.
        is_uniform = tf.reduce_all(tf.equal(triplet_individual_status, triplet_individual_status[:, 0:1]), axis=1) # [Q]

        # For qubits where `is_uniform` is `True`, determine the uniform status for the triplet
        # (which will be `True` if all units were collapsed, or `False` if all were not collapsed).
        # Replicate this uniform status across the three units of the triplet for those qubits.
        uniform_status_value = tf.cast(triplet_individual_status[:, 0], tf.int32) # [Q]

        # Construct the updates: if uniform, use the uniform_status_value for all three; otherwise, keep individual statuses
        updates_for_triplet = tf.where(
            tf.expand_dims(is_uniform, axis=-1), # [Q, 1] for broadcasting
            tf.tile(tf.expand_dims(uniform_status_value, axis=-1), [1, 3]), # [Q, 3]
            tf.cast(triplet_individual_status, tf.int32) # [Q, 3]
        )

        # Use `tf.tensor_scatter_nd_update` to update the relevant sections of `final_collapse_mask`
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1) # [Q*3, 2]

        updates_flat = tf.reshape(updates_for_triplet, [-1]) # [Q*3]

        final_collapse_mask = tf.tensor_scatter_nd_update(final_collapse_mask, indices_to_update, updates_flat)

    return final_collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components.
    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`)
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    for b_idx_triplet in tf.range(10):
        current_triplet_indices = idx[b_idx_triplet, :] # Shape [3]
        s = tf.gather(signs, current_triplet_indices) # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        # Ensure the list extension is compatible with TF operations if trip_mix is later converted to Tensor
        # Here, it's converted to Python list and then to Tensor once.
        trip_mix.extend([has_mix.numpy().item()]*3)
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs) # Pass R_FOR_RATIO implicitly from constants
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


Primaries In:
 [[[-0.14282727  0.4986279 ]
  [ 0.14282727 -0.4986279 ]
  [-0.92648005  0.25563264]
  [ 0.92648005 -0.25563264]
  [ 0.47266102  0.90747666]
  [-0.47266102 -0.90747666]]

 [[ 0.09285569 -0.38586164]
  [-0.09285569  0.38586164]
  [-0.6026249  -0.06765127]
  [ 0.6026249   0.06765127]
  [ 0.5076792  -0.8308573 ]
  [-0.5076792   0.8308573 ]]

 [[-0.6451609   0.7863047 ]
  [ 0.6451609  -0.7863047 ]
  [-0.39870954  0.38359904]
  [ 0.39870954 -0.38359904]
  [ 0.6007421  -0.00857735]
  [-0.6007421   0.00857735]]

 [[ 0.15974045 -0.63659024]
  [-0.15974045  0.63659024]
  [-0.4069388   0.0830853 ]
  [ 0.4069388  -0.0830853 ]
  [-0.16929793  0.94568753]
  [ 0.16929793 -0.94568753]]

 [[ 0.76533675 -0.9831886 ]
  [-0.76533675  0.9831886 ]
  [-0.3092246   0.17104912]
  [ 0.3092246  -0.17104912]
  [-0.5307014   0.8122597 ]
  [ 0.5307014  -0.8122597 ]]

 [[-0.550998   -0.4066124 ]
  [ 0.550998    0.4066124 ]
  [-0.07883334 -0.6230664 ]
  [ 0.07883334  0.6230664 ]
  [ 0.24346757 -0.02133

**Reasoning**:
The subtask requires modifying the `detect_collapse` function to adhere strictly to the 'Unit preference' rule. This involves calculating individual collapse statuses first and then applying triplet-level uniformity checks. This refactoring also removes the nested helper function and updates the function signature to explicitly accept relevant parameters.



In [None]:
import tensorflow as tf
import hashlib
import numpy as np # For make_keys numpy conversion
import math

# =========================
# Config and constants
# =========================
THETA_PHIPI = 0.001  # phi-pi tolerance constant
TAU_HI      = 1.0    # high threshold center (for collapse detection)
TAU_LOW     = -TAU_HI # low threshold for negative values (for collapse detection)
EPS         = 1e-6   # near-zero buffer

R_FOR_RATIO = 64.0 # NEW: Ratio threshold constant for collapse detection, updated to 64.0 as per instructions

# Advanced error correction metrics thresholds
TAU_R_METRIC = 0.85  # Adjusted Threshold for real stability metric (higher for stricter stability)
TAU_U_METRIC = 0.85  # Adjusted Threshold for unreal stability metric (higher for stricter stability)
TAU_D_METRIC = 0.85  # Adjusted Threshold for real/unreal divergence metric (higher for stricter consistency)

# Prime index mask for 0..29 (2,3,5,7,11,13,17,19,23,29)
PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# =========================
# Phase-Dual Helper Operations
# =========================

def add_phase_dual(a, b):
    """
    Performs component-wise addition for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| + n_|y, η| = n_|x+y, ξ+η|
    """
    return a + b

def mul_phase_dual_component_wise(a, b):
    """
    Performs component-wise multiplication for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    n_|x, ξ| · n_|y, η| = n_|x·y, ξ·η|
    """
    return a * b

def neg_phase_dual(a):
    """
    Performs component-wise negation for phase-dual tensors.
    Assumes last dimension is phase-dual (real, unreal).
    """
    return -a

# =========================
# Nth Identities
# =========================
def n_identity(order, selector_primary=None):
    """
    Conceptual Nth identity n^k.
    Args:
        order (int or str): The order of the identity. Can be 0, 1, 2, or 'p' for placeholder.
        selector_primary (tf.Tensor, optional): A 1x2 tensor representing promoted primary (x, xi)
                                               from which to derive n^1. Defaults to None.
    Returns:
        tf.Tensor: A 1x2 tensor representing the conceptual Nth identity.
    """
    if order == 0:
        # n^0 = n_|1, ξ| (base identity)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # [1, 2]
    elif order == 1:
        if selector_primary is not None:
            # Dynamically derive n^1 from a provided promoted primary
            # Normalize it to represent a unit selector
            magnitude = tf.norm(selector_primary, axis=-1, keepdims=True) # [1]
            # Handle potential division by zero by adding EPS
            normalized_selector = selector_primary / (magnitude + EPS)
            return tf.reshape(normalized_selector, [1, 2]) # Ensure output shape is [1, 2]
        else:
            # Default n^1 if no specific selector is provided
            return tf.constant([[1.0, 1.0]], dtype=tf.float32) / math.sqrt(2.0) # [1, 2]
    elif order == 2:
        # n^2 = ∏ n_|x_i, ξ_i| (product of two first-order selectors)
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder: could be more complex
    else:
        # For higher orders, we use a placeholder or a product of initial primaries
        return tf.constant([[1.0, 0.0]], dtype=tf.float32) # Placeholder for n^k (k > 1)

# =========================
# Core ISA Functions (Multi-Qubit, Phase-Dual Aware)
# =========================

def compute_pairs(prim):
    """
    Computes the 30-index phase-dual pair register from 6 primary phase-dual values.
    Takes `[Q, 6, 2]` primaries and returns a `[Q, 30, 2]` pair register,
    ensuring canonical index order and phase-dual component-wise operations.

    Args:
        prim (tf.Tensor): Input primaries of shape [Q, 6, 2] and dtype tf.float32.
                          The last dimension holds [real, unreal] components.

    Returns:
        tf.Tensor: The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
    """
    assert prim.shape.rank == 3 and (tf.shape(prim)[-2] == 6).numpy().item() and (tf.shape(prim)[-1] == 2).numpy().item() and (prim.dtype == tf.float32), \
        f"Input prim must have shape [Q, 6, 2] and dtype tf.float32, but got shape {prim.shape} and dtype {prim.dtype}"

    # Each x, xi, y, yi, z, zi will be a tensor of shape [Q, 2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=-2) # Unstack along the 6-dimension

    # Build full 30 vector: 6 primaries + 24 combinatorials
    # Operations are now component-wise for phase-dual values
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_phase_dual(x, y),   mul_phase_dual_component_wise(x, y),  add_phase_dual(x, yi),  mul_phase_dual_component_wise(x, yi),
        add_phase_dual(xi, y),  mul_phase_dual_component_wise(xi, y), add_phase_dual(xi, yi), mul_phase_dual_component_wise(xi, yi),
        add_phase_dual(x, z),   mul_phase_dual_component_wise(x, z),  add_phase_dual(x, zi),  mul_phase_dual_component_wise(x, zi),
        add_phase_dual(xi, z),  mul_phase_dual_component_wise(xi, z), add_phase_dual(xi, zi), mul_phase_dual_component_wise(xi, zi),
        add_phase_dual(y, z),   mul_phase_dual_component_wise(y, z),  add_phase_dual(y, zi),  mul_phase_dual_component_wise(y, zi),
        add_phase_dual(yi, z),  mul_phase_dual_component_wise(yi, z), add_phase_dual(yi, zi), mul_phase_dual_component_wise(yi, zi)
    ], axis=-2) # Stack along the 30-dimension
    return pairs

def group_triplets(pairs):
    """
    Groups the 30-index phase-dual pair register into 10 explicit triplets of 3 phase-dual values each.
    Takes `[Q, 30, 2]` pairs and returns `[Q, 10, 3, 2]` triplets using explicit index groups.
    These are 'Nth Lines' in the context of the ISA.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.

    Returns:
        tf.Tensor: 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    # Define the explicit indices for grouping into 10 triplets (as 3D points)
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # Use tf.gather to select and group the pairs. The last dimension (2) is preserved.
    triplets = tf.gather(pairs, idx, axis=1) # Shape [Q, 10, 3, 2]
    return triplets

def detect_collapse(pairs, tau_hi=TAU_HI, tau_low=TAU_LOW, r_for_ratio=R_FOR_RATIO):
    """
    Detects collapse across the 10 triplets within the phase-dual pair register, adhering to 'Unit Preference' rule.

    For each unit `p`, `individual_collapse_status_p = (real_p >= tau_hi AND unreal_p <= tau_low) OR (real_p / unreal_p > r_for_ratio)`.

    The `final_collapse_mask` is initialized with these individual statuses. Then, for each triplet:
    - If all three units within a triplet have a uniform collapse status (all True or all False), the triplet's units
      in `final_collapse_mask` are updated to this uniform status.
    - If the triplet has mixed statuses, the individual collapse statuses (calculated initially) are retained.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        tau_hi (float): High threshold for real component.
        tau_low (float): Low threshold for unreal component (should be negative).
        r_for_ratio (float): Ratio threshold for collapse detection.

    Returns:
        tf.Tensor: A binary collapse mask of shape [Q, 30] and dtype tf.int32.
                   (collapse is a per-unit binary flag, not phase-dual itself).
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"

    real_parts = pairs[..., 0] # [Q, 30]
    unreal_parts = pairs[..., 1] # [Q, 30]
    Q = tf.shape(pairs)[0]

    # 2. Inside detect_collapse, first evaluate the core collapse predicate for each of the 30 individual phase-dual units across all qubits:
    # For each unit `p`, calculate `cond1_p = (real_p >= tau_hi) AND (unreal_p <= tau_low)`.
    cond1_p = tf.logical_and(real_parts >= tau_hi, unreal_parts <= tau_low) # [Q, 30]

    # Calculate `ratio_term_p = real_p / unreal_p`, handling potential division by zero by setting the ratio to 0 if `unreal_p` is near zero (less than `EPS`).
    ratio_term_p = tf.where(tf.abs(unreal_parts) > EPS, real_parts / unreal_parts, tf.zeros_like(real_parts)) # [Q, 30]

    # Calculate `cond2_p = (ratio_term_p > r_for_ratio)`.
    cond2_p = (ratio_term_p > r_for_ratio) # [Q, 30]

    # Determine the `individual_collapse_status_p = cond1_p OR cond2_p` for each of the 30 units (this will be a boolean tensor of shape `[Q, 30]`)
    individual_collapse_status_p = tf.logical_or(cond1_p, cond2_p) # [Q, 30]

    # 3. Initialize the `final_collapse_mask` (a `tf.int32` tensor of shape `[Q, 30]`) with the `tf.cast` version of `individual_collapse_status_p`.
    # This tensor will hold the final collapse decisions, with individual statuses as the default fallback.
    final_collapse_mask = tf.cast(individual_collapse_status_p, tf.int32) # [Q, 30]

    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant(
        [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    # 4. Iterate through each of the 10 triplets defined by the `idx` tensor:
    for i in tf.range(10): # 10 triplets
        current_triplet_indices = idx[i, :] # Shape [3]

        # For the current triplet, extract the `individual_collapse_status` for its three constituent units across all qubits
        triplet_individual_status = tf.gather(individual_collapse_status_p, current_triplet_indices, axis=1) # [Q, 3]

        # For each qubit, check if the three units within the current triplet have a uniform collapse status
        # (i.e., all three are `True` or all three are `False`). Store this check in an `is_uniform` boolean tensor of shape `[Q]`.
        is_uniform = tf.reduce_all(tf.equal(triplet_individual_status, triplet_individual_status[:, 0:1]), axis=1) # [Q]

        # For qubits where `is_uniform` is `True`, determine the uniform status for the triplet
        # (which will be `True` if all units were collapsed, or `False` if all were not collapsed).
        # Replicate this uniform status across the three units of the triplet for those qubits.
        uniform_status_value = tf.cast(triplet_individual_status[:, 0], tf.int32) # [Q]

        # Construct the updates: if uniform, use the uniform_status_value for all three; otherwise, keep individual statuses
        updates_for_triplet = tf.where(
            tf.expand_dims(is_uniform, axis=-1), # [Q, 1] for broadcasting
            tf.tile(tf.expand_dims(uniform_status_value, axis=-1), [1, 3]), # [Q, 3]
            tf.cast(triplet_individual_status, tf.int32) # [Q, 3]
        )

        # Use `tf.tensor_scatter_nd_update` to update the relevant sections of `final_collapse_mask` for the units belonging to the current triplet, applying the uniform status where `is_uniform` is true, and otherwise keeping the existing `individual_collapse_status` (already in `final_collapse_mask`).
        indices_to_update = tf.stack([
            tf.repeat(tf.range(Q), 3),
            tf.tile(current_triplet_indices, [Q])
        ], axis=1) # [Q*3, 2]

        updates_flat = tf.reshape(updates_for_triplet, [-1]) # [Q*3]

        final_collapse_mask = tf.tensor_scatter_nd_update(final_collapse_mask, indices_to_update, updates_flat)

    return final_collapse_mask

def apply_parity_rotation(pairs, collapse_mask, prime_mask=PRIME_MASK):
    """
    Applies half-rotation (sign flip) to elements of a phase-dual pair register
    based on prime indices or detected collapse. The sign change applies to both
    real and unreal components. PAR(x, π) operation.

    Args:
        pairs (tf.Tensor): The 30-index phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
        collapse_mask (tf.Tensor): The collapse mask of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): A boolean mask for prime indices, shape [30] and dtype tf.int32.

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - rotated (tf.Tensor): The rotated phase-dual pair register of shape [Q, 30, 2] and dtype tf.float32.
            - affected (tf.Tensor): A mask of affected indices of shape [Q, 30] and dtype tf.int32.
    """
    assert pairs.shape.rank == 3 and (tf.shape(pairs)[-2] == 30).numpy().item() and (tf.shape(pairs)[-1] == 2).numpy().item() and (pairs.dtype == tf.float32), \
        f"Input pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {pairs.shape} and dtype {pairs.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(pairs)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"

    # Broadcast prime_mask to match the batch dimension of collapse_mask
    prime = tf.broadcast_to(prime_mask, tf.shape(collapse_mask)) # [Q, 30]

    # An index is 'affected' if it's a prime index OR part of a collapsed block
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32) # [Q, 30]

    # Sign is -1.0 for affected indices, 1.0 otherwise. Expand sign to [Q, 30, 1] to broadcast across real/unreal.
    sign = tf.where(affected > 0, tf.constant(-1.0, dtype=tf.float32), tf.constant(1.0, dtype=tf.float32))
    sign_expanded = tf.expand_dims(sign, axis=-1) # [Q, 30, 1]

    rotated = pairs * sign_expanded # [Q, 30, 2]
    return rotated, affected

def bitmap(rotated_pairs, eps=EPS):
    """
    Converts the phase-dual pair register into a binary bitmap.
    The bit is determined by the sign of the real component (leading value):
    1 if real_part > EPS (additive operation), 0 otherwise (subtractive/near-zero).

    Args:
        rotated_pairs (tf.Tensor): The phase-dual pair register values of shape [Q, 30, 2] and dtype tf.float32.
        eps (float): Near-zero buffer for tie-breaking.

    Returns:
        tf.Tensor: A binary bitmap of shape [Q, 30] and dtype tf.int32.
    """
    assert rotated_pairs.shape.rank == 3 and (tf.shape(rotated_pairs)[-2] == 30).numpy().item() and (tf.shape(rotated_pairs)[-1] == 2).numpy().item() and (rotated_pairs.dtype == tf.float32), \
        f"Input rotated_pairs must have shape [Q, 30, 2] and dtype tf.float32, but got shape {rotated_pairs.shape} and dtype {rotated_pairs.dtype}"

    # Get the real component (leading value) of each phase-dual unit
    real_parts = rotated_pairs[..., 0] # Shape [Q, 30]

    # Bit is 1 if real_part > EPS, else 0 (negatives and ties go to 0)
    bits = tf.cast(real_parts > eps, tf.int32) # Shape [Q, 30]
    return bits

def _value_unique_axis_phase_dual(vals, axis_vals, theta=THETA_PHIPI):
    """
    Helper function to determine if phase-dual values are unique along an axis within a tolerance.
    Uniqueness is determined based on the magnitude (`tf.norm`) of phase-dual units.
    It must handle `vals` of shape `[Q, 2]` (for individual primaries) and `[Q, 10, 2]` (for candidates).

    Args:
        vals (tf.Tensor): Candidate values for the axis, shape [Q, 2] or [Q, 10, 2].
        axis_vals (tf.Tensor): Observed values along the axis (from other qubits), shape [Q, K, 2].
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: A boolean tensor (cast to int32) of shape [Q] or [Q, 10] indicating uniqueness.
    """
    assert vals.dtype == tf.float32, f"Input vals must have dtype tf.float32, got {vals.dtype}"
    assert axis_vals.dtype == tf.float32, f"Input axis_vals must have dtype tf.float32, got {axis_vals.dtype}"
    assert axis_vals.shape.rank == 3 and (tf.shape(axis_vals)[-1] == 2).numpy().item(), f"Input axis_vals must have shape [Q, K, 2], got {axis_vals.shape}"
    assert (tf.shape(vals)[0] == tf.shape(axis_vals)[0]).numpy().item(), f"Batch dimension of vals ({tf.shape(vals)[0]}) and axis_vals ({tf.shape(axis_vals)[0]}) must match."

    if vals.shape.rank == 2: # vals is [Q, 2] (e.g., fx, fy, fz)
        # Expand vals to [Q, 1, 2] and axis_vals to [Q, K, 2] for broadcasting.
        # diffs will be [Q, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=1) - axis_vals)
    elif vals.shape.rank == 3: # vals is [Q, 10, 2] (e.g., x_candidates)
        # Expand vals to [Q, 10, 1, 2] and axis_vals to [Q, 1, K, 2] for correct broadcasting.
        # diffs will be [Q, 10, K, 2]
        diffs = tf.abs(tf.expand_dims(vals, axis=2) - tf.expand_dims(axis_vals, axis=1))
    else:
        raise ValueError(f"Input vals must be rank 2 or 3 (representing phase-duals), but got rank {tf.rank(vals)}")

    # Calculate magnitude of differences (distance between phase-dual units)
    magnitudes = tf.norm(diffs, axis=-1) # [Q, K] or [Q, 10, K]

    # Unique if ALL magnitudes are greater than theta across the K dimension
    unique = tf.reduce_all(magnitudes > theta, axis=-1)
    return tf.cast(unique, tf.int32) # [Q] or [Q, 10]

def _first_unique_selection_phase_dual(cand_bool, vals):
    """
    Helper function to select the first phase-dual value from `vals` where `cand_bool` is True.

    Args:
        cand_bool (tf.Tensor): Boolean tensor (int32) of shape [Q, 10] indicating uniqueness.
        vals (tf.Tensor): Phase-dual values from which to select, shape [Q, 10, 2].

    Returns:
        tf.Tensor: Selected phase-dual values of shape [Q, 2].
    """
    assert cand_bool.shape.rank == 2 and (tf.shape(cand_bool)[-1] == 10).numpy().item() and (cand_bool.dtype == tf.int32), \
        f"Input cand_bool must have shape [Q, 10] and dtype tf.int32, but got shape {cand_bool.shape} and dtype {cand_bool.dtype}"
    assert vals.shape.rank == 3 and (tf.shape(vals)[-2] == 10).numpy().item() and (tf.shape(vals)[-1] == 2).numpy().item() and (vals.dtype == tf.float32), \
        f"Input vals must have shape [Q, 10, 2] and dtype tf.float32, but got shape {vals.shape} and dtype {vals.dtype}"
    assert (tf.shape(cand_bool)[0] == tf.shape(vals)[0]).numpy().item(), f"Batch dimension of cand_bool ({tf.shape(cand_bool)[0]}) and vals ({tf.shape(vals)[0]}) must match."

    # tf.argmax returns the index of the first True, or 0 if no True value
    idx = tf.argmax(cand_bool, axis=1) # [Q]

    # Gather elements based on batch and determined index.
    # This needs to select a [Q, 2] tensor from [Q, 10, 2].
    batch_indices = tf.stack([tf.range(tf.shape(vals)[0], dtype=tf.int64), tf.cast(idx, tf.int64)], axis=1) # [Q, 2]
    selected_vals = tf.gather_nd(vals, batch_indices) # [Q, 2]
    return selected_vals

def promote_primaries(triplets, axis_maps, theta=THETA_PHIPI):
    """
    Promotes primaries based on uniqueness of the final triplet, with axis-level fallback.
    Handles phase-dual components.
    Args:
        triplets (tf.Tensor): 10 triplets of shape [Q, 10, 3, 2] and dtype tf.float32.
        axis_maps (dict): Dictionary with keys 'x', 'y', 'z' and values being tf.Tensor
                          of observed values from other qubits for that axis, shape [Q, K, 2] and dtype tf.float32.
        theta (float): Tolerance threshold.

    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2] and dtype tf.float32.
    """
    assert triplets.shape.rank == 4 and (tf.shape(triplets)[-3] == 10).numpy().item() and (tf.shape(triplets)[-2] == 3).numpy().item() and (tf.shape(triplets)[-1] == 2).numpy().item(), \
        f"Input triplets must have shape [Q, 10, 3, 2] and dtype tf.float32, but got shape {triplets.shape}"
    assert triplets.dtype == tf.float32, \
        f"Input triplets must have dtype tf.float32, but got {triplets.dtype}"
    for k, v in axis_maps.items():
        assert isinstance(v, tf.Tensor) and v.dtype == tf.float32 and v.shape.rank == 3 and (tf.shape(v)[-1] == 2).numpy().item(), \
            f"axis_maps['{k}'] must be tf.Tensor of shape [Q, K, 2] and dtype tf.float32, but got shape {v.shape} and dtype {v.dtype}"
    assert (tf.shape(triplets)[0] == tf.shape(axis_maps['x'])[0]).numpy().item(), f"Batch dimension of triplets ({tf.shape(triplets)[0]}) and axis_maps ({tf.shape(axis_maps['x'])[0]}) must match."


    # Triplet-first promotion logic
    final_triplet = triplets[:, -1, :, :]  # [Q, 3, 2]
    fx, fy, fz = final_triplet[:,0,:], final_triplet[:,1,:], final_triplet[:,2,:] # Each [Q, 2]

    # Check uniqueness of final triplet components against respective axis maps
    ux_final = _value_unique_axis_phase_dual(fx, axis_maps['x'], theta) # [Q]
    uy_final = _value_unique_axis_phase_dual(fy, axis_maps['y'], theta) # [Q]
    uz_final = _value_unique_axis_phase_dual(fz, axis_maps['z'], theta) # [Q]

    # Triplet is unique if all its components are unique
    triplet_unique = tf.cast(tf.logical_and(tf.logical_and(ux_final > 0, uy_final > 0), uz_final > 0), tf.int32) # [Q]

    # Construct prim_trip with phase-dual conjugates (-x, -y, -z for both real and unreal components)
    prim_trip = tf.stack([fx, neg_phase_dual(fx), fy, neg_phase_dual(fy), fz, neg_phase_dual(fz)], axis=1) # [Q, 6, 2]

    # Axis-fallback promotion logic
    x_candidates = triplets[:,:,0,:] # [Q, 10, 2]
    y_candidates = triplets[:,:,1,:] # [Q, 10, 2]
    z_candidates = triplets[:,:,2,:] # [Q, 10, 2]

    # Determine uniqueness for all 10 candidates per axis (magnitudes)
    ux_all_candidates = _value_unique_axis_phase_dual(x_candidates, axis_maps['x'], theta) # [Q, 10]
    uy_all_candidates = _value_unique_axis_phase_dual(y_candidates, axis_maps['y'], theta) # [Q, 10]
    uz_all_candidates = _value_unique_axis_phase_dual(z_candidates, axis_maps['z'], theta) # [Q, 10]

    # Select the first unique candidate (phase-dual) for each axis
    x_sel = _first_unique_selection_phase_dual(ux_all_candidates, x_candidates) # [Q, 2]
    y_sel = _first_unique_selection_phase_dual(uy_all_candidates, y_candidates) # [Q, 2]
    z_sel = _first_unique_selection_phase_dual(uz_all_candidates, z_candidates) # [Q, 2]

    # Construct prim_axis with phase-dual conjugates
    prim_axis = tf.stack([x_sel, neg_phase_dual(x_sel), y_sel, neg_phase_dual(y_sel), z_sel, neg_phase_dual(z_sel)], axis=1) # [Q, 6, 2]

    # Choose between triplet-first and axis-fallback based on triplet_unique
    # choose_trip_expanded needs to be [Q, 1, 1] to broadcast with [Q, 6, 2]
    choose_trip_expanded = tf.cast(tf.expand_dims(tf.expand_dims(triplet_unique, axis=-1), axis=-1), tf.float32) # [Q, 1, 1]

    primaries_out = tf.where(choose_trip_expanded > 0, prim_trip, prim_axis) # Resulting shape [Q, 6, 2]

    return primaries_out

def make_keys(bits, prime_mask, collapse_mask, parity_mask, lineage_list=None):
    """
    Generates SHA256 resonance keys for each batch sample.
    Hashing is performed in pure Python/NumPy after tensors are materialized.
    Accepts an optional `lineage_list` for logging resonance keys,
    concatenating the lineage string to the base hash.

    Args:
        bits (tf.Tensor): Bitmap of shape [Q, 30] and dtype tf.int32.
        prime_mask (tf.Tensor): Prime index mask of shape [30] and dtype tf.int32 (global constant).
        collapse_mask (tf.Tensor): Collapse mask of shape [Q, 30] and dtype tf.int32.
        parity_mask (tf.Tensor): Parity mask of shape [Q, 30] and dtype tf.int32.
        lineage_list (list[str], optional): A list of lineage strings for each batch sample. Defaults to None.

    Returns:
        list[str]: A list of SHA256 hex digests, one for each batch sample.
    """
    assert bits.shape.rank == 2 and (tf.shape(bits)[-1] == 30).numpy().item() and (bits.dtype == tf.int32), \
        f"Input bits must have shape [Q, 30] and dtype tf.int32, but got shape {bits.shape} and dtype {bits.dtype}"
    assert prime_mask.shape.rank == 1 and (tf.shape(prime_mask)[-1] == 30).numpy().item() and (prime_mask.dtype == tf.int32), \
        f"Input prime_mask must have shape [30] and dtype tf.int32, but got shape {prime_mask.shape} and dtype {prime_mask.dtype}"
    assert collapse_mask.shape.rank == 2 and (tf.shape(collapse_mask)[-1] == 30).numpy().item() and (tf.shape(collapse_mask)[0] == tf.shape(bits)[0]).numpy().item() and (collapse_mask.dtype == tf.int32), \
        f"Input collapse_mask must have shape [Q, 30] and dtype tf.int32, but got shape {collapse_mask.shape} and dtype {collapse_mask.dtype}"
    assert parity_mask.shape.rank == 2 and (tf.shape(parity_mask)[-1] == 30).numpy().item() and (tf.shape(parity_mask)[0] == tf.shape(bits)[0]).numpy().item() and (parity_mask.dtype == tf.int32), \
        f"Input parity_mask must have shape [Q, 30] and dtype tf.int32, but got shape {parity_mask.shape} and dtype {parity_mask.dtype}"
    assert (tf.shape(bits)[0].numpy().item() == tf.shape(collapse_mask)[0].numpy().item()) and (tf.shape(bits)[0].numpy().item() == tf.shape(parity_mask)[0].numpy().item()), \
        f"Batch dimensions of bits ({tf.shape(bits)[0].numpy().item()}), collapse_mask ({tf.shape(collapse_mask)[0].numpy().item()}), and parity_mask ({tf.shape(parity_mask)[0].numpy().item()}) must match."
    if lineage_list is not None:
        assert isinstance(lineage_list, list) and len(lineage_list) == tf.shape(bits)[0].numpy().item(), \
            f"If provided, lineage_list must be a list of strings with length matching batch size ({tf.shape(bits)[0].numpy().item()})"

    Q = tf.shape(bits)[0].numpy().item() # Use Q for multi-qubit batch size
    keys = []

    # Convert all tensors to NumPy arrays first (if not already) for pure Python/NumPy hashing
    bits_np = bits.numpy()
    prime_mask_np = prime_mask.numpy()
    collapse_np = collapse_mask.numpy()
    parity_np = parity_mask.numpy()

    # Broadcast the global prime_mask to match batch dimension for concatenation
    prime_mask_broadcasted = np.broadcast_to(prime_mask_np, (Q, 30))

    for q_idx in range(Q):
        # Construct lineage manifest (e.g., concatenate all relevant info into a string)
        lineage_manifest = f"bits:{bits_np[q_idx].tolist()}|prime:{prime_mask_broadcasted[q_idx].tolist()}|collapse:{collapse_np[q_idx].tolist()}|parity:{parity_np[q_idx].tolist()}"
        if lineage_list and lineage_list[q_idx]:
            lineage_manifest += f"|path:{lineage_list[q_idx]}"

        # Hash the lineage manifest
        final_hash = hashlib.sha256(lineage_manifest.encode("utf-8")).hexdigest()
        keys.append(final_hash)
    return keys

def compute_info_energy(primaries_out, k_values, a_U_constant):
    """
    NGFT-inspired function to compute InfoUnit components like k and I.
    Info-energy is proportional to sum of magnitudes of primary values
    weighted by k (real-valued) and a universal constant.
    E_info = (k+1) · a_U · I

    Args:
        primaries_out (tf.Tensor): Promoted primaries of shape [Q, 6, 2] (phase-dual) and dtype tf.float32.
        k_values (tf.Tensor): Batch-wise 'k' components, shape [Q, 1] and dtype tf.float32.
        a_U_constant (tf.Tensor): A universal constant, scalar tf.float32.

    Returns:
        tf.Tensor: Computed Info-energy for each qubit, shape [Q] and dtype tf.float32.
    """
    assert primaries_out.shape.rank == 3 and (tf.shape(primaries_out)[-1] == 2).numpy().item(), \
        f"Input primaries_out must have shape [Q, 6, 2] and rank 3, but got shape {primaries_out.shape} and rank {primaries_out.shape.rank}"
    assert (primaries_out.dtype == tf.float32), f"primaries_out must have dtype tf.float32, but got {primaries_out.dtype}"
    assert (tf.shape(primaries_out)[-2] == 6).numpy().item(), f"primaries_out must have shape [Q, 6, 2], but got {primaries_out.shape}"
    assert (k_values.dtype == tf.float32), f"k_values must have dtype tf.float32, but got {k_values.dtype}"
    assert ( (tf.rank(k_values) == 2).numpy().item() and (tf.shape(k_values)[-1] == 1).numpy().item() ) or \
           ( (tf.rank(k_values) == 1).numpy().item() and (tf.shape(k_values)[0] == tf.shape(primaries_out)[0]).numpy().item() ), \
           f"k_values must have shape [Q, 1] or [Q], but got {k_values.shape}"
    assert (a_U_constant.dtype == tf.float32), f"a_U_constant must have dtype tf.float32, but got {a_U_constant.dtype}"
    assert (tf.rank(a_U_constant) == 0).numpy().item(), f"a_U_constant must be a scalar, but got rank {tf.rank(a_U_constant)}"

    # Normalize k_values to ensure it's always [Q, 1] for consistent multiplication
    if (tf.rank(k_values) == 1).numpy().item(): # Use .numpy().item() to convert boolean tensor to Python bool
        k_values_normalized = tf.expand_dims(k_values, axis=-1) # Converts [Q] to [Q, 1]
    else:
        k_values_normalized = k_values # Already [Q, 1] or expected [Q, 1]

    # Calculate magnitude for each phase-dual primary unit, resulting in shape [Q, 6]
    magnitudes_per_primary = tf.norm(primaries_out, axis=-1) # Shape [Q, 6]

    # Sum these magnitudes along axis 1 (the 6 components), resulting in shape [Q]
    sum_magnitudes = tf.reduce_sum(magnitudes_per_primary, axis=1) # Shape [Q]

    # Explicitly expand dimensions to make it [Q, 1] for multiplication
    I_component = tf.expand_dims(sum_magnitudes, axis=-1) # Shape [Q, 1]

    # Info-energy calculation: (k+1) * I * a_U_constant
    info_energy = (k_values_normalized + 1.0) * I_component * a_U_constant # Shape [Q, 1]

    # Return info_energy squeezed along axis=1 to get shape [Q]
    return tf.squeeze(info_energy, axis=1)

# =========================
# NECL v0.1 Operations
# =========================

def CURV(primaries, params_kappa):
    """
    NECL function: Applies a curvilinear transformation.
    X ← X / (1 + |kappa|·|X|)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_kappa (tf.Tensor): Scalar or broadcastable tensor for kappa parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    # Ensure kappa is broadcastable to primaries (Q,6,2)
    kappa = tf.cast(params_kappa, primaries.dtype)
    # Compute magnitude |X|
    prim_magnitude = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    return primaries / (1.0 + tf.abs(kappa) * prim_magnitude)

def GEOD(primaries, params_t):
    """
    NECL function: Applies a geodesic transformation.
    X ← X + t·sign(X)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_t (tf.Tensor): Scalar or broadcastable tensor for 't' parameter.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    t = tf.cast(params_t, primaries.dtype)
    return primaries + t * tf.sign(primaries)

def TWIST(primaries, params_theta):
    """
    NECL function: Applies a twist transformation to the unreal component.
    X[...,1] ← X[...,1]·cos(theta)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_theta (tf.Tensor): Scalar or broadcastable tensor for 'theta' angle.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    theta = tf.cast(params_theta, primaries.dtype)
    unreal_twisted = primaries[..., 1] * tf.cos(theta)
    return tf.stack([primaries[..., 0], unreal_twisted], axis=-1)

def LIFT(primaries, params_d):
    """
    Conceptual NECL function: Projects to higher coordinates, preserving invariants.
    For this software emulation, a simplified conceptual implementation that scales
    based on 'd' (e.g., a simple multiplicative factor).
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_d (tf.Tensor): Scalar parameter for higher dimension 'd'.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    d_factor = tf.cast(params_d, primaries.dtype) # Convert to float for multiplication
    # Conceptual: maybe scale magnitude by sqrt(d) or some other invariant preserving factor
    return primaries * (1.0 + d_factor * 0.1) # Simple scaling for conceptual lift

def GLUE(primaries, params_sigma):
    """
    Conceptual NECL function: Simulates 'gluing' of primaries.
    X ← X + sigma·roll(X, +1, axis=k)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_sigma (tf.Tensor): Scalar parameter for gluing strength.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 6, 2].
    """
    sigma = tf.cast(params_sigma, primaries.dtype)
    # Roll along the 'k' (selectors) axis for conceptual inter-selector influence
    return primaries + sigma * tf.roll(primaries, shift=1, axis=1)

def SPLIT(primaries, params_tau):
    """
    Conceptual NECL function: Splits primaries, potentially increasing `k`.
    X ← concat(X·(1−tau), X·tau)
    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        params_tau (tf.Tensor): Scalar parameter for split ratio.
    Returns:
        tf.Tensor: Transformed primaries of shape [Q, 12, 2] (doubles k dimension).
    """
    tau = tf.cast(params_tau, primaries.dtype)
    # This increases the K dimension, so the output shape changes.
    return tf.concat([primaries * (1.0 - tau), primaries * tau], axis=1)

# =========================
# Hash->State Mapping Function
# =========================

def decode_lineage_hash(hex_hash_str, q_idx, D, num_qubits, invariants):
    """
    A Python function that takes a hex hash string, number of qubits Q_count, and dimension D.
    It parses portions of the hash to conceptually generate `spin_vec` (shape `[Q, 2, 3]`) and `i_vec` (shape `[Q, D]`)
    The generation is conceptual, mapping parts of the hash to float/int values and scaling them.

    Args:
        hex_hash_str (str): A SHA256 hex hash string for one qubit.
        q_idx (int): The index of the qubit.
        D (int): Dimensionality for i_vec.
        num_qubits (int): Total number of qubits (for seed generation consistency).
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').

    Returns:
        tuple[tf.Tensor, tf.Tensor]:
            - spin_vec (tf.Tensor): Conceptual spin vector of shape [1, 2, 3] and dtype tf.float32.
            - i_vec (tf.Tensor): Conceptual internal state vector of shape [1, D] and dtype tf.float32.
    """
    assert isinstance(hex_hash_str, str) and len(hex_hash_str) == 64, f"Hex hash string must be 64 characters, got {len(hex_hash_str)}"
    assert D >= 16, f"D for I_vec must be at least 16, got {D}"

    # Use the entire hash for more unique seeding, combined with qubit index for per-qubit determinism
    seed_value = int(hashlib.sha256(f"{hex_hash_str}-{q_idx}".encode('utf-8')).hexdigest()[:16], 16)
    np.random.seed(seed_value % (2**32 - 1)) # Ensure seed fits numpy's typical seed range

    # 1) bytes = hex_to_bytes(H); r = (bytes/255)
    # Conceptual: Use parts of the hash string directly for pseudo-random number generation
    # For this conceptual implementation, we'll just derive randoms from the seed.

    # 2) θ = 2π·r0, φ = 2π·r1, twist = 2π·r2
    # Generate random angles for spherical coordinates and twist
    r_vals = np.random.rand(3) # pseudo-random values for r0, r1, r2
    theta = 2 * math.pi * r_vals[0]
    phi = 2 * math.pi * r_vals[1]
    twist_angle = 2 * math.pi * r_vals[2]

    # 3) Real spin: (x,y,z) = (sinθ cosφ, sinθ sinφ, cosθ)
    real_spin_x = math.sin(theta) * math.cos(phi)
    real_spin_y = math.sin(theta) * math.sin(phi)
    real_spin_z = math.cos(theta)

    # 4) Unreal spin: rotate (x,y) around z by 'twist'
    # Apply 2D rotation matrix for x,y components of unreal spin
    unreal_spin_x = real_spin_x * math.cos(twist_angle) - real_spin_y * math.sin(twist_angle)
    unreal_spin_y = real_spin_x * math.sin(twist_angle) + real_spin_y * math.cos(twist_angle)
    unreal_spin_z = real_spin_z # Z-component remains unchanged by Z-axis twist

    spin_vec_data = np.array([
        [real_spin_x, real_spin_y, real_spin_z], # Real components
        [unreal_spin_x, unreal_spin_y, unreal_spin_z] # Unreal components
    ], dtype=np.float32)
    spin_vec = tf.reshape(tf.constant(spin_vec_data), (1, 2, 3)) # Reshape to [1, 2, 3]

    # 5) I_vec: take r[3:3+16], normalize to ||I_vec||=1 (or your ν); bind H to resonance key
    # For simplicity, generating D random floats and normalizing.
    i_vec_data = np.random.rand(D).astype(np.float32)
    # Apply conceptual normalization based on invariants (e.g., Euclidean norm to 1)
    i_vec_data = i_vec_data / np.linalg.norm(i_vec_data) if np.linalg.norm(i_vec_data) > EPS else i_vec_data # Avoid div by zero
    i_vec = tf.reshape(tf.constant(i_vec_data), (1, D)) # Reshape to [1, D]

    return spin_vec, i_vec

# =========================
# Multi-Qubit Ops Wrappers (ISA instructions for multi-qubit)
# =========================

def NORMALIZE_Q(primaries, invariants):
    """
    NORM(X, ν): Multi-qubit wrapper for normalization to canonical invariants.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        invariants (dict): Dictionary of invariant constants (e.g., 'units', 'tol', 'ordering').
    Returns:
        tf.Tensor: Normalized primaries of shape [Q, 6, 2].
    """
    # Conceptual normalization: Scale each primary unit (real, unreal) by its total magnitude
    # across all 6 primary units for that qubit, to a 'unit' scale defined by invariants.
    magnitudes = tf.norm(primaries, axis=-1, keepdims=True) # [Q, 6, 1]
    total_magnitudes_per_qubit = tf.reduce_sum(magnitudes, axis=1, keepdims=True) # [Q, 1, 1]

    # Avoid division by zero for zero-magnitudes
    # Scale to a conceptual 'unit' value (e.g., 1.0) or invariant 'units'
    unit_scale = invariants.get('units', 1.0) # Default unit scale
    normalized_primaries = primaries / (total_magnitudes_per_qubit + EPS) * tf.where(total_magnitudes_per_qubit > EPS, tf.cast(unit_scale, primaries.dtype), 0.0)
    return normalized_primaries

def PARITY_Q(primaries, prime_mask):
    """
    Multi-qubit wrapper for apply_parity_rotation. PAR(X, π) operation.
    Computes pairs and collapse mask internally to determine affected elements.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
        prime_mask (tf.Tensor): Global prime mask [30].
    Returns:
        tf.Tensor: Primaries updated based on parity rotation [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs)
    rotated_pairs, _ = apply_parity_rotation(pairs, collapse_mask, prime_mask)
    # The rotated_pairs are [Q, 30, 2], but primaries are [Q, 6, 2].
    # We extract the first 6 elements corresponding to the primaries themselves.
    return rotated_pairs[:, 0:6, :]

def COLLAPSE_Q(primaries):
    """
    Multi-qubit wrapper for detect_collapse. COLL(X, χ) operation.
    Zeroes out only the specific primary units that are part of a collapsed block,
    rather than zeroing out the entire qubit's primaries.
    Args:
        primaries (tf.Tensor): Primaries of shape [Q, 6, 2].
    Returns:
        tf.Tensor: Primaries updated based on collapse detection [Q, 6, 2].
    """
    pairs = compute_pairs(primaries)
    collapse_mask = detect_collapse(pairs) # [Q, 30]

    # 1. Extract the portion of the mask that corresponds to the 6 primary units
    primary_collapse_flags = collapse_mask[:, 0:6] # Shape [Q, 6]

    # 2. Expand primary_collapse_flags to have a shape compatible with primaries [Q, 6, 2]
    primary_collapse_flags_expanded = tf.expand_dims(primary_collapse_flags, axis=-1) # Shape [Q, 6, 1]

    # 3. Convert this expanded mask to a tf.float32 tensor for use with tf.where
    primary_collapse_flags_float = tf.cast(primary_collapse_flags_expanded, tf.float32) # Shape [Q, 6, 1]

    # 4. Use tf.where to create updated_primaries
    # If the flag is 1, set the primary unit (real and unreal components) to [0.0, 0.0]
    # Otherwise, keep the original primary unit value.
    updated_primaries = tf.where(primary_collapse_flags_float > 0, tf.zeros_like(primaries), primaries)
    return updated_primaries

def ASSOC_Q(triplets, axis_maps, theta_phipi):
    """
    Multi-qubit wrapper for promote_primaries. ASSOC(A, B, α) operation.
    Args:
        triplets (tf.Tensor): Triplets of shape [Q, 10, 3, 2].
        axis_maps (dict): Axis maps for uniqueness checks.
        theta_phipi (float): Tolerance for uniqueness.
    Returns:
        tf.Tensor: Promoted primaries of shape [Q, 6, 2].
    """
    return promote_primaries(triplets, axis_maps, theta_phipi)

def APPLY_NECL(primaries, necl_program_list, params_dict, prime_mask, conceptual_target_state=None):
    """
    Applies a sequence of NECL operations to multi-qubit primaries.
    Handles conceptual operations and integrated ISA steps like PARITY_Q and COLLAPSE_Q.

    Args:
        primaries (tf.Tensor): Input primaries of shape [Q, 6, 2].
        necl_program_list (list[str]): List of NECL operation names to apply.
        params_dict (dict): Dictionary mapping NECL op names to their parameters.
        prime_mask (tf.Tensor): Global prime mask needed for PARITY_Q.
        conceptual_target_state (tf.Tensor, optional): A target state for GEOD. Defaults to zeros_like.

    Returns:
        tf.Tensor: Final primaries after applying the NECL program.
        str: Checksum of the applied NECL program.
    """
    current_primaries = primaries
    Q = tf.shape(primaries)[0].numpy().item()

    if conceptual_target_state is None:
        conceptual_target_state = tf.zeros_like(primaries)

    # Build a manifest of the applied program for checksum
    program_manifest = ""

    for op_name in necl_program_list:
        program_manifest += op_name # Add op name to manifest

        if op_name == 'CURV':
            op_params = params_dict.get('CURV', tf.constant(0.01, dtype=tf.float32))
            current_primaries = CURV(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GEOD':
            op_params = params_dict.get('GEOD', tf.constant(0.05, dtype=tf.float32))
            current_primaries = GEOD(current_primaries, op_params) # GEOD uses a target state; simplified here.
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'TWIST':
            op_params = params_dict.get('TWIST', tf.constant(math.pi/4, dtype=tf.float32)) # Use a radian value
            current_primaries = TWIST(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'LIFT':
            op_params = params_dict.get('LIFT', tf.constant(0.5, dtype=tf.float32)) # Default 'd' factor
            current_primaries = LIFT(current_primaries, op_params)
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'GLUE':
            op_params = params_dict.get('GLUE', tf.constant(0.1, dtype=tf.float32)) # Sigma for gluing strength
            if Q % 2 != 0:
                print(f"Warning: GLUE operation skipped for odd Q ({Q})")
            else:
                # For conceptual multi-qubit GLUE, average current with a 'rolled' version of itself
                # This mimics interaction/averaging across an 'nth line'
                current_primaries = GLUE(current_primaries, tf.roll(current_primaries, shift=1, axis=0) * op_params) # Roll along Q dimension
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'SPLIT':
            op_params = params_dict.get('SPLIT', tf.constant(0.5, dtype=tf.float32)) # Tau for split ratio
            # For simplicity, if SPLIT is called directly in NECL program, we just return original primaries
            # as the problem implies a constant K for the main pipeline. A real split would return doubled K.
            # For this example, we'll return primaries*1 for consistency of shape.
            current_primaries = current_primaries # Simplified as per instructions for 'main pipeline example to keep K constant'
            program_manifest += f"({op_params.numpy().item()})"
        elif op_name == 'PARITY_Q':
            current_primaries = PARITY_Q(current_primaries, prime_mask)
        elif op_name == 'COLLAPSE_Q':
            current_primaries = COLLAPSE_Q(current_primaries)
        else:
            print(f"Warning: Unknown NECL operation: {op_name}")

    necl_checksum = hashlib.sha256(program_manifest.encode('utf-8')).hexdigest()
    return current_primaries, necl_checksum

# =========================
# Error Correction (New) - Advanced
# =========================

def r_metric(real_parts):
    """
    Quantifies real stability/cohesion based on variance of real parts of pairs.
    Higher value implies higher stability.
    """
    # 1 - (normalized variance). A value close to 1 means low variance (high stability).
    # Ensure inputs are not all identical to avoid division by zero in variance calculation.
    max_val = tf.reduce_max(real_parts)
    min_val = tf.reduce_min(real_parts)
    if (max_val - min_val) < EPS: # Check if all values are effectively the same
        return 1.0 # Max stability if no variance

    return 1.0 - (tf.math.reduce_variance(real_parts) / (max_val - min_val + EPS))

def u_metric(unreal_parts):
    """
    Quantifies unreal stability/cohesion based on variance of unreal parts of pairs.
    Higher value implies higher stability.
    """
    max_val = tf.reduce_max(unreal_parts)
    min_val = tf.reduce_min(unreal_parts)
    if (max_val - min_val) < EPS:
        return 1.0

    return 1.0 - (tf.math.reduce_variance(unreal_parts) / (max_val - min_val + EPS))

def dv_metric(pairs_q):
    """
    Quantifies real/unreal divergence based on the mean absolute difference between
    real and unreal components for each pair, relative to their magnitude.
    Higher value implies lower divergence (higher consistency).
    """
    real_parts = pairs_q[..., 0]
    unreal_parts = pairs_q[..., 1]
    abs_diff = tf.abs(real_parts - unreal_parts)
    magnitudes = tf.norm(pairs_q, axis=-1)

    # Avoid division by zero, if magnitude is very small, divergence is also small
    divergence_per_index = tf.where(magnitudes > EPS, abs_diff / (magnitudes + EPS), tf.zeros_like(magnitudes))
    mean_divergence = tf.reduce_mean(divergence_per_index)
    return 1.0 - mean_divergence # High value for low divergence

def invariant_check_conceptual(pairs_q, triplets_q, invariants):
    """
    Conceptual function to check for invariants (e.g., specific sum/product rules).
    Returns True if a conceptual invariant holds, False otherwise.
    """
    # Example invariant: The sum of magnitudes of the 6 primaries should be close to 'units'
    # For this, we need magnitudes of the actual primaries (first 6 pairs).
    prim_magnitudes = tf.norm(pairs_q[:6, :], axis=-1) # Magnitudes of the 6 primaries
    sum_prim_magnitudes = tf.reduce_sum(prim_magnitudes) # Scalar
    units = invariants.get('units', 1.0)
    return tf.abs(sum_prim_magnitudes - units) < invariants.get('tol', EPS)

def degenerate_check(primaries_q):
    """
    Conceptual function to check for degenerate states (e.g., all zeros/near-zeros).
    Returns True if primaries are degenerate, False otherwise.
    """
    # Degenerate if all primaries are very close to zero
    return tf.reduce_all(tf.norm(primaries_q, axis=-1) < EPS)

def derive_bits_advanced(pairs_q, triplets_q, invariants, initial_TAU_R, initial_TAU_U, initial_TAU_D):
    """
    Derives corrected bits based on a per-index rule and guards.
    Rule: b_i=1 if r_i>TAU_R AND u_i>TAU_U AND dv_i>TAU_D AND trip_mix>0 AND inv==True AND deg==False else 0.
    Returns corrected bits and the final thresholds used for derivation.
    """
    current_TAU_R = initial_TAU_R
    current_TAU_U = initial_TAU_U
    current_TAU_D = initial_TAU_D

    real = pairs_q[:,0]     # [30]
    unreal = pairs_q[:,1]   # [30]
    mag = tf.norm(pairs_q, axis=-1) # Magnitude of each pair_q unit

    # Per-index stability/divergence metrics (conceptual)
    r_i = tf.where(mag > EPS, tf.abs(real) / mag, tf.zeros_like(mag)) # Ratio of real component magnitude to total magnitude
    u_i = tf.where(mag > EPS, tf.abs(unreal) / mag, tf.zeros_like(mag)) # Ratio of unreal component magnitude to total magnitude
    dv_i = tf.where(mag > EPS, tf.abs(real - unreal) / mag, tf.zeros_like(mag)) # Ratio of diff magnitude to total magnitude

    # Triplet diversity: require sign-mix within each triplet block
    signs = tf.sign(pairs_q[:,0]) # Signs of the real parts of each pair
    trip_mix = []
    # Define the explicit indices for grouping into 10 triplets
    idx = tf.constant([
        [0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
        [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]
    ], dtype=tf.int32) # Shape [10, 3]

    for b_idx_triplet in tf.range(10):
        current_triplet_indices = idx[b_idx_triplet, :] # Shape [3]
        s = tf.gather(signs, current_triplet_indices) # Select signs for the current triplet block
        # Check if there is any sign difference within the triplet block
        has_mix = tf.cast(tf.reduce_any(tf.not_equal(s, s[0])), tf.int32)
        # Ensure the list extension is compatible with TF operations if trip_mix is later converted to Tensor
        # Here, it's converted to Python list and then to Tensor once.
        trip_mix.extend([has_mix.numpy().item()]*3)
    trip_mix = tf.convert_to_tensor(trip_mix, dtype=tf.int32)  # [30]

    # Global invariant checks
    invariant_ok = invariant_check_conceptual(pairs_q, triplets_q, invariants)
    not_degenerate = tf.logical_not(degenerate_check(pairs_q[:6, :])) # Check degeneracy of primaries

    # Initial bit derivation using provided thresholds
    b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 1: Minimum entropy check. If current bit pattern has low entropy, adjust thresholds
    def min_entropy_ok(bits):
        p = tf.reduce_mean(tf.cast(bits, tf.float32))
        H = - (p * tf.math.log(p + EPS) + (1.0 - p) * tf.math.log(1.0 - p + EPS))
        return H > 0.3 # Example entropy threshold

    if not min_entropy_ok(b):
        # Adjust thresholds to encourage more sparsity/less certainty
        current_TAU_R *= 1.2
        current_TAU_U *= 1.2
        current_TAU_D = max(current_TAU_D * 0.9, 0.25) # Example adjustments
        b = tf.cast((r_i > current_TAU_R) & (u_i > current_TAU_U) & (dv_i > current_TAU_D) & (trip_mix > 0) & invariant_ok & not_degenerate, tf.int32)

    # Guard 2: Never allow all-ones or all-zeros final decision, if it happens, fallback
    if tf.reduce_all(b == 1) or tf.reduce_all(b == 0):
        # Fallback to marking indices where the real component magnitude exceeds EPS and triplet mix holds
        b = tf.cast((tf.abs(real) > EPS) & (trip_mix > 0), tf.int32)

    return b, current_TAU_R, current_TAU_U, current_TAU_D # Return adjusted thresholds

def correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, resonance_key_q, TRACE, invariants):
    """
    Advanced Error Correction hook for a single qubit (q_idx). This function performs a local
    re-evaluation of the bit pattern for the current qubit if the initial derivation
    is deemed 'inconsistent'.

    This function is designed to:
    - Advance *only* within the same triplet (or within the primaries 6-set) for local re-evaluation.
      It uses the `pairs_q` and `triplets_q` already derived for this specific qubit `q_idx`.
      It does not implicitly advance to other qubits or triplets; its scope is limited to the
      current qubit's local tuplet structure.
    - Record lineage for any local adjustments made. If a correction occurs, a specific
      entry is added to the `TRACE` log, detailing the reason, source, metrics, and new key.
    - *Not* advance across different units (triplets or qubits) unless the current local unit
      has been exhausted. The `derive_bits_advanced` function, called internally,
      operates solely on the provided `pairs_q` and `triplets_q` for the current qubit.

    Args:
        q_idx (int): The index of the current qubit being processed.
        pairs_q (tf.Tensor): The 30-index phase-dual pair register for the current qubit [30, 2].
        triplets_q (tf.Tensor): The 10 triplets for the current qubit [10, 3, 2].
        current_bits_q (tf.Tensor): The initially derived 30-bit pattern for the current qubit [30].
        resonance_key_q (str): The current resonance key string for the qubit.
        TRACE (list): A list to append lineage information if corrections are made.
        invariants (dict): Dictionary of invariant constants.

    Returns:
        tuple[tf.Tensor, str]:
            - new_bits_q (tf.Tensor): The potentially corrected 30-bit pattern.
            - updated_resonance_key_q (str): The updated resonance key string (with lineage if corrected).
    """
    # Check for inconsistency: if all bits are 1s, or all 0s, or if the count of ones is very low/high
    num_ones = tf.reduce_sum(current_bits_q)
    is_all_ones = tf.reduce_all(tf.equal(current_bits_q, 1))
    is_all_zeros = tf.reduce_all(tf.equal(current_bits_q, 0))
    is_sparse = num_ones < 5 # Example: less than 5 bits are 1
    is_dense = num_ones > 25 # Example: more than 25 bits are 1

    is_inconsistent = (is_all_ones or is_all_zeros or is_sparse or is_dense).numpy().item() # Convert boolean tensor to Python boolean

    if is_inconsistent:
        # Call the advanced bit derivation function and capture adjusted thresholds
        corrected_bits, adjusted_TAU_R, adjusted_TAU_U, adjusted_TAU_D = derive_bits_advanced(pairs_q, triplets_q, invariants, TAU_R_METRIC, TAU_U_METRIC, TAU_D_METRIC)

        # Update Bits[q] with corrected_bits
        new_bits_q = corrected_bits

        # Update lineage and ResonanceKey[q]
        # The updated key incorporates the correction lineage.
        updated_resonance_key_q = hashlib.sha256((resonance_key_q + "REFactorBits" + str(new_bits_q.numpy().tolist())).encode("utf-8")).hexdigest()
        TRACE.append({'qubit': q_idx, 'reason':"binary_refactor", 'source':"tuplets",
                      'r_metric': r_metric(pairs_q[:,0]).numpy().item(), # Log metrics for trace
                      'u_metric': u_metric(pairs_q[:,1]).numpy().item(),
                      'dv_metric': dv_metric(pairs_q).numpy().item(),
                      'invariant_pass': invariant_check_conceptual(pairs_q, triplets_q, invariants).numpy().item(),
                      'degenerate_check': degenerate_check(pairs_q[:6, :]).numpy().item(),
                      'correction_threshold_r': adjusted_TAU_R, # Log adjusted thresholds
                      'correction_threshold_u': adjusted_TAU_U,
                      'correction_threshold_d': adjusted_TAU_D, \
                      'corrected_bits': new_bits_q.numpy().tolist(),
                      'old_key': resonance_key_q, 'new_key': updated_resonance_key_q}) # Fix: Use updated_resonance_key_q
        return new_bits_q, updated_resonance_key_q # Fix: Return updated_resonance_key_q
    else:
        return current_bits_q, resonance_key_q

# =========================
# Reproducible Example (Multi-Qubit)
# =========================

# Number of virtual qubits
Q = 64 # Changed Q to 64 as per instructions

# Dynamically generate initial_primaries
# Each primary (x, y, z) is a phase-dual [real, unreal]
# Need to generate Q sets of (x,y,z) then derive their negations.

# Generate random x, y, z components (each as a phase-dual [real, unreal]) for Q qubits
# Shape [Q, 3, 2] representing (x,y,z) base primaries
base_primaries_xyz = tf.random.uniform(shape=[Q, 3, 2], minval=-1.0, maxval=1.0, dtype=tf.float32)

# Construct initial_primaries = [x, -x, y, -y, z, -z]
# Where x, y, z are from base_primaries_xyz and -x is neg_phase_dual(x)
initial_primaries = tf.concat([
    base_primaries_xyz[:, 0, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 0, :])[:, tf.newaxis, :], # x, -x
    base_primaries_xyz[:, 1, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 1, :])[:, tf.newaxis, :], # y, -y
    base_primaries_xyz[:, 2, :][:, tf.newaxis, :], neg_phase_dual(base_primaries_xyz[:, 2, :])[:, tf.newaxis, :], # z, -z
], axis=1) # Shape [Q, 6, 2]

# Dynamically generate axis_maps
# axis_maps for each axis ('x', 'y', 'z') should be of shape [Q, K_max, 2]
# where K_max is the maximum K across all qubits and axes.

list_of_axis_maps_x = []
list_of_axis_maps_y = []
list_of_axis_maps_z = []

max_k_dynamic = 0
min_k_val = 3 # Minimum K as per problem description
max_k_val = 11 # Arbitrary maximum K for random generation

for q_idx in range(Q):
    # Generate a random K for each qubit and for each axis map (for x, y, z separately)
    k_x = np.random.randint(min_k_val, max_k_val)
    k_y = np.random.randint(min_k_val, max_k_val)
    k_z = np.random.randint(min_k_val, max_k_val)

    list_of_axis_maps_x.append(tf.random.uniform(shape=[k_x, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_y.append(tf.random.uniform(shape=[k_y, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))
    list_of_axis_maps_z.append(tf.random.uniform(shape=[k_z, 2], minval=-1.0, maxval=1.0, dtype=tf.float32))

    max_k_dynamic = max(max_k_dynamic, k_x, k_y, k_z)

# Pad all generated axis map tensors to max_k_dynamic
axis_maps = {
    'x': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_x]),
    'y': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_y]),
    'z': tf.stack([tf.pad(t, [[0, max_k_dynamic - tf.shape(t)[0]], [0, 0]], "CONSTANT", constant_values=0.0) for t in list_of_axis_maps_z]),
}

# Update k_values to have a shape [Q, 1] with random float32 values between 0.0 and 1.0
k_values = tf.random.uniform(shape=[Q, 1], minval=0.0, maxval=1.0, dtype=tf.float32)

# Define a_U_constant (from NGFT)
a_U_constant = tf.constant(10.0, dtype=tf.float32) # Scalar

# Dynamically generate lineage_hashes
lineage_hashes = []
for q_idx in range(Q):
    lineage_hashes.append(hashlib.sha256(f"Q{q_idx}_PathDynamic_{np.random.randint(0, 1000)}".encode('utf-8')).hexdigest())

# Sample NECL program (list of operation strings) - NECL[q] = [op(args), ...]
# For this example, all qubits share the same NECL program.
necl_program_shared = ['TWIST', 'CURV', 'PARITY_Q', 'COLLAPSE_Q', 'LIFT']

# Placeholder parameters for NECL operations (can be expanded)
necl_params = {
    'CURV': tf.constant(0.01, dtype=tf.float32), # kappa
    'GEOD': tf.constant(0.05, dtype=tf.float32), # t
    'TWIST': tf.constant(math.pi/4, dtype=tf.float32),  # theta (radians)
    'LIFT': tf.constant(0.5, dtype=tf.float32),   # d (e.g., a scaling factor based on d)
    'GLUE': tf.constant(0.1, dtype=tf.float32),   # sigma
    'SPLIT': tf.constant(0.5, dtype=tf.float32),  # tau
}

# Invariants ν: {units, tol, ordering}
invariants = {
    'units': 1.0,
    'tol': 1e-5, # A new tolerance for error correction
    'ordering': 'real_unreal_first',
    'correction_threshold': 0.1 # Threshold for scores in error correction
}

# TRACE (lineage manifest) - list of dictionaries to log events
TRACE = []

# =========================
# Main Cycle (per run)
# =========================

# 1) X ← NORM(X, ν)
primaries_normalized = NORMALIZE_Q(initial_primaries, invariants)

# 2) X ← APPLY_NECL(X, NECL)       # default order: TWIST → CURV → PARITY_Q → COLLAPSE_Q
primaries_after_necl, necl_program_checksum = APPLY_NECL(primaries_normalized, necl_program_shared, necl_params, PRIME_MASK)

# 3) Pairs[q], Triplets[q] ← compute_tuplets(X[q]) (This step implies per-qubit computation for pairs and triplets)
# In our vectorized setup, we compute for all Q simultaneously.
all_pairs = compute_pairs(primaries_after_necl) # [Q, 30, 2]
all_triplets = group_triplets(all_pairs) # [Q, 10, 3, 2]

# 4) Bits[q] ← bitmap(X[q].real)  # binary collapse map (phase-dual aware)
# We'll re-detect collapse and parity for the final state to generate initial bits for error correction.
final_collapse_mask = detect_collapse(all_pairs) # Pass R_FOR_RATIO implicitly from constants
final_rotated_pairs, final_parity_mask = apply_parity_rotation(all_pairs, final_collapse_mask, PRIME_MASK)
initial_bits = bitmap(final_rotated_pairs) # [Q, 30]

corrected_bits_list = []
final_resonance_keys = []

# Loop through each qubit for error correction (if needed) and key generation
for q_idx in range(Q):
    # Extract per-qubit data
    pairs_q = all_pairs[q_idx] # [30, 2]
    triplets_q = all_triplets[q_idx] # [10, 3, 2]
    current_bits_q = initial_bits[q_idx] # [30]
    current_lineage_hash = lineage_hashes[q_idx]

    # Manual modification to force an 'inconsistent' state for Qubit 0 for demonstration
    if q_idx == 0:
        # Example: set Qubit 0's bits to be very sparse (e.g., only one '1')
        sparse_bits_for_q0 = tf.concat([tf.ones([1], dtype=tf.int32), tf.zeros([29], dtype=tf.int32)], axis=0)
        current_bits_q = sparse_bits_for_q0

    # Error Correction (Step A & B from instructions)
    corrected_bits_q, updated_key_q = correct_bits(q_idx, pairs_q, triplets_q, current_bits_q, current_lineage_hash, TRACE, invariants)
    corrected_bits_list.append(corrected_bits_q)
    # The updated_key_q already contains the 'REFactorBits' lineage if correction occurred
    final_resonance_keys.append(updated_key_q)

# Convert corrected_bits_list back to a tensor for subsequent use if needed
corrected_bits_tensor = tf.stack(corrected_bits_list)

# 5) PrimariesOut[q] ← promote_primaries(Pairs[q], Triplets[q])
# This step uses the full triplets and axis maps to promote new primaries
primaries_out_promoted = ASSOC_Q(all_triplets, axis_maps, THETA_PHIPI)

# 6) InfoEnergy[q] ← (k+1)·a_U·I   # I from tuplet entropy
info_energy_output = compute_info_energy(primaries_out_promoted, k_values, a_U_constant)

# 7) ResonanceKey[q] ← hash(lineage_manifest)
# This is done within the loop for correct_bits and then in make_keys
# The final_resonance_keys list already holds the updated keys after potential error correction.

# 8) Spin[q], I_vec[q] ← decode_hash(H[q])
# Decode for the first qubit as an example.
Q_for_decode_example = 1 # We decode for 1 qubit per hash call
D_for_decode_example = 16 # D ≥ 16 as per instruction

all_spin_vecs_decoded = []
all_i_vecs_decoded = []
for q_idx in range(Q):
    spin_vec_decoded, i_vec_decoded = decode_lineage_hash(lineage_hashes[q_idx], q_idx, D=D_for_decode_example, num_qubits=Q, invariants=invariants)
    all_spin_vecs_decoded.append(spin_vec_decoded)
    all_i_vecs_decoded.append(i_vec_decoded)

# Concatenate decoded spins and i_vecs to get [Q, 2, 3] and [Q, D]
spin_vecs_decoded_tensor = tf.concat(all_spin_vecs_decoded, axis=0)
i_vecs_decoded_tensor = tf.concat(all_i_vecs_decoded, axis=0)

# =========================
# --- Print Results ---
# =========================
print("Primaries In:\n", initial_primaries.numpy())
print("\nPrimaries After NECL:\n", primaries_after_necl.numpy())
# Print pairs and triplets per-qubit, as they are part of the intermediate tuplet constructs
print("\nPairs[0]:\n", all_pairs[0].numpy())
print("\nTriplets[0]:\n", all_triplets[0].numpy())
print("\nBits (all qubits):\n", corrected_bits_tensor.numpy()) # Use corrected bits
print("\nPrimaries Out (promoted):\n", primaries_out_promoted.numpy())

# Conceptual Nth identities: {n^1, n^2, n^3, n^p} per qubit
print("\nNth Identities (Conceptual, per qubit):\n")
for q_idx in range(Q):
    # Extract promoted_primary_x for the current qubit
    promoted_primary_x = primaries_out_promoted[q_idx, 0, :] # Shape [2]

    # Ensure promoted_primary_x is explicitly converted to a Tensor for n_identity
    promoted_primary_x_tensor = tf.convert_to_tensor(promoted_primary_x, dtype=tf.float32)

    print(f"  Qubit {q_idx}:")
    print(f"    n^0 (base identity): {n_identity(0).numpy()[0]}")
    print(f"    n^1 (first-order selector): {n_identity(1, selector_primary=promoted_primary_x_tensor).numpy()[0]}")
    print(f"    n^2 (second-order product): {n_identity(2).numpy()[0]}") # Placeholder
    print(f"    n^p (p-order product): {n_identity('p').numpy()[0]}") # Placeholder

print("\nInfo-energy Output (all qubits):\n", info_energy_output.numpy())
print("\nResonance Keys (all qubits):\n", final_resonance_keys)
print("\nSpin (all qubits, conceptual):\n", spin_vecs_decoded_tensor.numpy())
print("\nI_vec (all qubits, conceptual):\n", i_vecs_decoded_tensor.numpy())

# NECL manifest + checksum per qubit - Conceptual: print TRACE log and a checksum of it
necl_manifest_checksums = []
for q_idx in range(Q):
    qubit_trace_entries = [entry for entry in TRACE if entry['qubit'] == q_idx]
    manifest_str = str(qubit_trace_entries)
    checksum = hashlib.sha256(manifest_str.encode('utf-8')).hexdigest()
    necl_manifest_checksums.append(checksum)
print("\nNECL Manifest Checksums (per qubit, conceptual):\n", necl_manifest_checksums)
print("\nTRACE Log (Conceptual - detailed lineage for error correction):\n", TRACE)


Primaries In:
 [[[ 0.38381505  0.4721625 ]
  [-0.38381505 -0.4721625 ]
  [-0.9002569   0.30928206]
  [ 0.9002569  -0.30928206]
  [-0.77344847 -0.25261378]
  [ 0.77344847  0.25261378]]

 [[ 0.9345577   0.7938018 ]
  [-0.9345577  -0.7938018 ]
  [-0.20542765  0.41429496]
  [ 0.20542765 -0.41429496]
  [ 0.6797228  -0.41083193]
  [-0.6797228   0.41083193]]

 [[-0.16832232 -0.9434247 ]
  [ 0.16832232  0.9434247 ]
  [ 0.06129098 -0.34418797]
  [-0.06129098  0.34418797]
  [-0.38914895 -0.63503885]
  [ 0.38914895  0.63503885]]

 [[-0.98245907  0.5274403 ]
  [ 0.98245907 -0.5274403 ]
  [-0.42973685  0.9610157 ]
  [ 0.42973685 -0.9610157 ]
  [ 0.27415085  0.25171733]
  [-0.27415085 -0.25171733]]

 [[ 0.71996427  0.6754923 ]
  [-0.71996427 -0.6754923 ]
  [-0.46750283  0.04121709]
  [ 0.46750283 -0.04121709]
  [-0.69546294 -0.17058277]
  [ 0.69546294  0.17058277]]

 [[ 0.41989803 -0.79322433]
  [-0.41989803  0.79322433]
  [ 0.22988105  0.07526851]
  [-0.22988105 -0.07526851]
  [ 0.72737     0.22108

In [None]:
# Colab-ready single-cell test harness for an Nth/NGFT-inspired "ISA step"
# Keeps: triplet loops, per-triplet scatter updates, and adds explicit cross-qubit state arbitration.
# Goal: 64 qubits -> 30-bit word each (1920 bits total), plus canonical shared-state grouping and unique per-qubit addressing IDs.

import tensorflow as tf
import numpy as np
import hashlib
import math
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

# -----------------------------
# Config / constants (spec knobs)
# -----------------------------
THETA_PHIPI = 0.001
TAU_HI      = 1.0
TAU_LOW     = -TAU_HI
EPS         = 1e-6
R_FOR_RATIO = 64.0

TAU_R_METRIC = 0.85
TAU_U_METRIC = 0.85
TAU_D_METRIC = 0.85

PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

TRIPLET_IDX = tf.constant(
    [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
     [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]],
    dtype=tf.int32
)

# -----------------------------
# Helpers: phase-dual operations (float32; last dim=2 -> [real, unreal])
# -----------------------------
def add_pd(a, b): return a + b
def mul_pd(a, b): return a * b
def neg_pd(a): return -a

# -----------------------------
# Helpers: packing and bit-slicing
# -----------------------------
def pack30_to_u32_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    """
    bits30_i32: [Q,30] int32 {0,1}
    Returns: [Q] uint32 packed (bit i holds bits30[:,i])
    """
    bits_u32 = tf.cast(bits30_i32, tf.uint32)
    shifts = tf.cast(tf.range(30), tf.uint32)  # [30]
    packed = tf.reduce_sum(tf.bitwise.left_shift(bits_u32, shifts), axis=1)  # [Q]
    return packed

def bitslice_30_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    """
    bits30_i32: [Q,30] int32 {0,1}, Q<=64
    Returns: [30] uint64 mask per bit-position (lane=q)
    """
    Q = tf.shape(bits30_i32)[0]
    weights = tf.bitwise.left_shift(tf.constant(1, tf.uint64), tf.cast(tf.range(Q), tf.uint64))  # [Q]
    bits_u64 = tf.cast(bits30_i32, tf.uint64)  # [Q,30]
    # For each i in 0..29: mask[i] = sum_q bits[q,i] * (1<<q)
    masks = tf.reduce_sum(tf.transpose(bits_u64, [1,0]) * tf.expand_dims(weights, axis=0), axis=1)  # [30]
    return masks

# -----------------------------
# Core ISA: pairs and triplets
# -----------------------------
def compute_pairs(prim: tf.Tensor) -> tf.Tensor:
    """
    prim: [Q,6,2] float32 -> pairs: [Q,30,2]
    Canonical expansion (same structure as your original).
    """
    tf.debugging.assert_shapes([(prim, ("Q", 6, 2))])
    tf.debugging.assert_type(prim, tf.float32)

    x, xi, y, yi, z, zi = tf.unstack(prim, axis=1)  # each [Q,2]

    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_pd(x, y),   mul_pd(x, y),  add_pd(x, yi),  mul_pd(x, yi),
        add_pd(xi, y),  mul_pd(xi, y), add_pd(xi, yi), mul_pd(xi, yi),
        add_pd(x, z),   mul_pd(x, z),  add_pd(x, zi),  mul_pd(x, zi),
        add_pd(xi, z),  mul_pd(xi, z), add_pd(xi, zi), mul_pd(xi, zi),
        add_pd(y, z),   mul_pd(y, z),  add_pd(y, zi),  mul_pd(y, zi),
        add_pd(yi, z),  mul_pd(yi, z), add_pd(yi, zi), mul_pd(yi, zi)
    ], axis=1)
    return pairs

def group_triplets(pairs: tf.Tensor) -> tf.Tensor:
    """
    pairs: [Q,30,2] -> triplets: [Q,10,3,2]
    """
    tf.debugging.assert_shapes([(pairs, ("Q", 30, 2))])
    return tf.gather(pairs, TRIPLET_IDX, axis=1)

# -----------------------------
# Collapse detection: triplet loop + scatter update (preserves your structure)
# -----------------------------
def detect_collapse_triplet_scatter(pairs: tf.Tensor,
                                   tau_hi: float = TAU_HI,
                                   tau_low: float = TAU_LOW,
                                   r_for_ratio: float = R_FOR_RATIO) -> tf.Tensor:
    """
    pairs: [Q,30,2] -> collapse_mask: [Q,30] int32
    Semantics:
      1) compute individual collapse per unit
      2) for each triplet, if uniform (all same), enforce uniform into final mask
         else keep individual values
    Implemented via triplet loop and tf.tensor_scatter_nd_update to match your intent.
    """
    tf.debugging.assert_shapes([(pairs, ("Q", 30, 2))])
    real = pairs[..., 0]   # [Q,30]
    unreal = pairs[..., 1] # [Q,30]
    Q = tf.shape(pairs)[0]

    cond1 = tf.logical_and(real >= tau_hi, unreal <= tau_low)
    ratio = tf.where(tf.abs(unreal) > EPS, real / unreal, tf.zeros_like(real))
    cond2 = ratio > r_for_ratio
    individual = tf.logical_or(cond1, cond2)  # [Q,30] bool

    final_mask = tf.cast(individual, tf.int32)  # init fallback

    # Triplet loop with scatter updates (write-back to the 30-wide mask).
    for t in tf.range(10):
        idx3 = TRIPLET_IDX[t]  # [3]

        trip_ind = tf.gather(individual, idx3, axis=1)  # [Q,3] bool

        # uniform per qubit if all 3 equal
        is_uniform = tf.reduce_all(tf.equal(trip_ind, trip_ind[:, 0:1]), axis=1)  # [Q] bool
        uniform_val = tf.cast(trip_ind[:, 0], tf.int32)  # [Q] 0/1

        # updates_for_triplet: [Q,3] int32
        updates_for_triplet = tf.where(
            tf.expand_dims(is_uniform, axis=1),  # [Q,1]
            tf.tile(tf.expand_dims(uniform_val, axis=1), [1,3]),
            tf.cast(trip_ind, tf.int32)
        )

        # scatter indices: [Q*3,2] for updating final_mask[q, idx3[j]]
        q_idx = tf.repeat(tf.range(Q), repeats=3)          # [Q*3]
        p_idx = tf.tile(idx3, multiples=[Q])               # [Q*3]
        scatter_idx = tf.stack([q_idx, p_idx], axis=1)     # [Q*3,2]

        final_mask = tf.tensor_scatter_nd_update(final_mask, scatter_idx, tf.reshape(updates_for_triplet, [-1]))

    return final_mask

# -----------------------------
# Parity rotation + bitmap
# -----------------------------
def apply_parity_rotation(pairs: tf.Tensor, collapse_mask: tf.Tensor, prime_mask: tf.Tensor = PRIME_MASK):
    """
    pairs: [Q,30,2], collapse_mask: [Q,30] int32 -> rotated_pairs: [Q,30,2], affected(parity_mask): [Q,30] int32
    """
    tf.debugging.assert_shapes([(pairs, ("Q", 30, 2)), (collapse_mask, ("Q", 30))])

    Q = tf.shape(pairs)[0]
    prime = tf.broadcast_to(tf.cast(prime_mask, tf.int32)[tf.newaxis, :], [Q, 30])
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32)

    sign = tf.where(affected > 0, tf.constant(-1.0, tf.float32), tf.constant(1.0, tf.float32))  # [Q,30]
    rotated = pairs * sign[..., tf.newaxis]  # [Q,30,2]
    return rotated, affected

def bitmap(rotated_pairs: tf.Tensor, eps: float = EPS) -> tf.Tensor:
    """
    rotated_pairs: [Q,30,2] -> bits: [Q,30] int32, 1 if real>eps else 0
    """
    tf.debugging.assert_shapes([(rotated_pairs, ("Q", 30, 2))])
    real = rotated_pairs[..., 0]
    return tf.cast(real > eps, tf.int32)

# -----------------------------
# Canonical commitment and decode (portable serialization; uses Python hashlib for test)
# -----------------------------
def commit_qubit_u32(packed_bits_u32: int,
                     packed_collapse_u32: int,
                     packed_parity_u32: int,
                     packed_prime_u32: int,
                     domain_sep: bytes = b"NTHISA0") -> bytes:
    msg = (
        domain_sep +
        packed_prime_u32.to_bytes(4, "little", signed=False) +
        packed_bits_u32.to_bytes(4, "little", signed=False) +
        packed_collapse_u32.to_bytes(4, "little", signed=False) +
        packed_parity_u32.to_bytes(4, "little", signed=False)
    )
    return hashlib.blake2s(msg, digest_size=32).digest()

def prf_expand(key32: bytes, personalization: bytes, out_len: int) -> bytes:
    out = b""
    ctr = 0
    while len(out) < out_len:
        out += hashlib.blake2s(personalization + key32 + ctr.to_bytes(4, "little"), digest_size=32).digest()
        ctr += 1
    return out[:out_len]

def decode_commitment_to_spin_I(commitment32: bytes, D: int = 16) -> Tuple[np.ndarray, np.ndarray]:
    raw = prf_expand(commitment32, b"SPINIVEC", 12 + 4*D)
    a0 = int.from_bytes(raw[0:4], "little")
    a1 = int.from_bytes(raw[4:8], "little")
    a2 = int.from_bytes(raw[8:12], "little")
    r0 = a0 / 2**32
    r1 = a1 / 2**32
    r2 = a2 / 2**32

    theta = 2.0 * math.pi * r0
    phi   = 2.0 * math.pi * r1
    twist = 2.0 * math.pi * r2

    real_spin = np.array([
        math.sin(theta) * math.cos(phi),
        math.sin(theta) * math.sin(phi),
        math.cos(theta)
    ], dtype=np.float32)

    unreal_spin = np.array([
        real_spin[0] * math.cos(twist) - real_spin[1] * math.sin(twist),
        real_spin[0] * math.sin(twist) + real_spin[1] * math.cos(twist),
        real_spin[2]
    ], dtype=np.float32)

    spin = np.stack([real_spin, unreal_spin], axis=0)  # [2,3]

    ivec = np.empty((D,), dtype=np.float32)
    off = 12
    for i in range(D):
        u = int.from_bytes(raw[off+4*i:off+4*i+4], "little")
        ivec[i] = u / 2**32
    n = float(np.linalg.norm(ivec))
    if n > EPS:
        ivec /= n
    return spin, ivec

# -----------------------------
# Global state arbitration (cross-qubit): collision groups + unique per-qubit address IDs
# -----------------------------
@dataclass
class ArbitrationResult:
    canonical_state_u32: np.ndarray      # [Q] uint32 (30-bit canonical state)
    unique_state_id_u64: np.ndarray      # [Q] uint64 (unique address tag per qubit)
    groups: Dict[int, List[int]]         # {canonical_u32: [q indices]}
    collision_count: int                 # total number of qubits involved in collisions (beyond first in each group)

def arbitrate_states(packed_state_u32: np.ndarray,
                     instruction_counter: int,
                     necl_checksum32: bytes,
                     mode: str = "unique_address_keep_shared_state") -> ArbitrationResult:
    """
    Enforces your requirement at the ISA level:
      - qubits may share canonical state if they converge (shared informational states)
      - but addressing must be unique per qubit for "single-qubit addressed per state calculation"

    This function:
      1) groups qubits by canonical 30-bit state (packed_state_u32)
      2) produces a unique per-qubit 64-bit address id derived from (canonical_state, q_idx, instr_counter, program checksum)
         This creates unique start-state identifiers for the next instruction without destroying the shared canonical state.

    If you later decide you truly want to *force* canonical uniqueness (no shared states),
    you can swap the mode to apply deterministic symmetry-breaking transforms to primaries and re-run.
    """
    Q = packed_state_u32.shape[0]
    groups: Dict[int, List[int]] = {}
    for q in range(Q):
        k = int(packed_state_u32[q])
        groups.setdefault(k, []).append(q)

    unique_ids = np.zeros((Q,), dtype=np.uint64)
    collision_count = 0

    for k, qs in groups.items():
        if len(qs) > 1:
            collision_count += (len(qs) - 1)

        for rank, q in enumerate(qs):
            # Deterministic address: hash(canonical_state || q || instr_counter || necl_checksum)
            msg = (
                b"NTHADDR0" +
                int(k).to_bytes(4, "little", signed=False) +
                int(q).to_bytes(2, "little", signed=False) +
                int(instruction_counter).to_bytes(4, "little", signed=False) +
                necl_checksum32
            )
            h = hashlib.blake2s(msg, digest_size=8).digest()  # 64-bit id
            unique_ids[q] = int.from_bytes(h, "little", signed=False)

    return ArbitrationResult(
        canonical_state_u32=packed_state_u32.astype(np.uint32),
        unique_state_id_u64=unique_ids,
        groups=groups,
        collision_count=collision_count
    )

# -----------------------------
# Minimal NECL checksum placeholder for the step (portable program identity)
# -----------------------------
def necl_program_checksum(necl_program_list: List[str], params_dict: Dict[str, float]) -> bytes:
    # Canonical program manifest bytes (no float formatting ambiguity: pack params as float32 bytes)
    b = b"NECL0"
    for op in necl_program_list:
        b += op.encode("ascii") + b"\x00"
        if op in params_dict:
            b += np.float32(params_dict[op]).tobytes()
        else:
            b += np.float32(0.0).tobytes()
    # 32 bytes checksum
    return hashlib.blake2s(b, digest_size=32).digest()

# -----------------------------
# One ISA "instruction step" (test): primaries -> pairs -> collapse -> parity -> bits -> commitments -> arbitration
# -----------------------------
@dataclass
class StepOutputs:
    primaries: tf.Tensor                 # [Q,6,2]
    pairs: tf.Tensor                     # [Q,30,2]
    triplets: tf.Tensor                  # [Q,10,3,2]
    collapse_mask: tf.Tensor             # [Q,30] int32
    rotated_pairs: tf.Tensor             # [Q,30,2]
    parity_mask: tf.Tensor               # [Q,30] int32
    bits: tf.Tensor                      # [Q,30] int32
    bitmasks_30x_u64: tf.Tensor          # [30] uint64
    commitments: List[bytes]             # len Q
    arbitration: ArbitrationResult       # global uniqueness + shared-state groups
    spin_vecs: np.ndarray                # [Q,2,3] float32
    i_vecs: np.ndarray                   # [Q,D] float32

def isa_step(primaries: tf.Tensor,
             necl_program: List[str],
             necl_params: Dict[str, float],
             instruction_counter: int = 0,
             D_decode: int = 16) -> StepOutputs:
    Q = primaries.shape[0]
    if Q is None:
        Q = int(tf.shape(primaries)[0].numpy())

    # Program identity (used in arbitration/addressing)
    necl_chk = necl_program_checksum(necl_program, necl_params)

    # Core pipeline (no removal of triplet loops / scatter updates)
    pairs = compute_pairs(primaries)
    triplets = group_triplets(pairs)
    collapse_mask = detect_collapse_triplet_scatter(pairs)  # triplet loop + scatter update retained
    rotated_pairs, parity_mask = apply_parity_rotation(pairs, collapse_mask, PRIME_MASK)
    bits = bitmap(rotated_pairs)

    # Bit-sliced 30×u64 = 1920 bits at Q=64
    bitmasks_30x_u64 = bitslice_30_tf(bits)

    # Pack for commitments / arbitration (materialize once; Q small)
    packed_bits = pack30_to_u32_tf(bits).numpy().astype(np.uint32)
    packed_collapse = pack30_to_u32_tf(collapse_mask).numpy().astype(np.uint32)
    packed_parity = pack30_to_u32_tf(parity_mask).numpy().astype(np.uint32)
    packed_prime = int(pack30_to_u32_tf(tf.broadcast_to(PRIME_MASK[tf.newaxis,:], [tf.shape(bits)[0],30]))[0].numpy())

    # Commit per qubit
    commitments = []
    for q in range(Q):
        commitments.append(commit_qubit_u32(
            int(packed_bits[q]),
            int(packed_collapse[q]),
            int(packed_parity[q]),
            int(packed_prime)
        ))

    # Cross-qubit arbitration: shared canonical grouping + unique address id per qubit
    arbitration = arbitrate_states(
        packed_state_u32=packed_bits,
        instruction_counter=instruction_counter,
        necl_checksum32=necl_chk,
        mode="unique_address_keep_shared_state"
    )

    # Decode commitments into conceptual spin + I vectors (portable PRF)
    spin_vecs = np.zeros((Q,2,3), dtype=np.float32)
    i_vecs = np.zeros((Q,D_decode), dtype=np.float32)
    for q in range(Q):
        s, iv = decode_commitment_to_spin_I(commitments[q], D=D_decode)
        spin_vecs[q] = s
        i_vecs[q] = iv

    return StepOutputs(
        primaries=primaries,
        pairs=pairs,
        triplets=triplets,
        collapse_mask=collapse_mask,
        rotated_pairs=rotated_pairs,
        parity_mask=parity_mask,
        bits=bits,
        bitmasks_30x_u64=bitmasks_30x_u64,
        commitments=commitments,
        arbitration=arbitration,
        spin_vecs=spin_vecs,
        i_vecs=i_vecs
    )

# -----------------------------
# Demo / test run (Colab)
# -----------------------------
tf.random.set_seed(7)
np.random.seed(7)

Q = 64  # fixed for the 1920-bit per instruction demonstration

# Start state: 3D point (x,y,z) each phase-dual -> expand to [x,-x,y,-y,z,-z]
base_xyz = tf.random.uniform([Q,3,2], minval=-1.0, maxval=1.0, dtype=tf.float32)
x = base_xyz[:,0,:]; y = base_xyz[:,1,:]; z = base_xyz[:,2,:]
primaries0 = tf.stack([x, -x, y, -y, z, -z], axis=1)

# NECL program identity (kept as identity here; your real NECL would transform primaries first)
necl_program = ["TWIST","CURV","PARITY_Q","COLLAPSE_Q","LIFT"]
necl_params = {"TWIST": float(math.pi/4), "CURV": 0.01, "LIFT": 0.5}

out = isa_step(primaries0, necl_program, necl_params, instruction_counter=0, D_decode=16)

print("=== ISA STEP OUTPUTS ===")
print("primaries:", out.primaries.shape)
print("pairs:", out.pairs.shape)
print("triplets:", out.triplets.shape)
print("collapse_mask:", out.collapse_mask.shape, out.collapse_mask.dtype)
print("parity_mask:", out.parity_mask.shape, out.parity_mask.dtype)
print("bits:", out.bits.shape, out.bits.dtype)
print("bitmasks_30x_u64:", out.bitmasks_30x_u64.shape, out.bitmasks_30x_u64.dtype)
print("Total bits per instruction (Q*30):", Q*30)

# Show 30 masks (each a 64-bit lane mask)
masks_np = out.bitmasks_30x_u64.numpy()
print("Mask[0] u64:", int(masks_np[0]))
print("Mask[1] u64:", int(masks_np[1]))
print("Mask[29] u64:", int(masks_np[29]))

# Commitments and decoded states
print("\nCommitment[0] (hex):", out.commitments[0].hex())
print("spin_vec[0]:\n", out.spin_vecs[0])
print("i_vec[0] (norm):", float(np.linalg.norm(out.i_vecs[0])))

# Global novelty / uniqueness
arb = out.arbitration
print("\n=== GLOBAL STATE ARBITRATION ===")
print("Unique address IDs per qubit (first 5):", arb.unique_state_id_u64[:5].tolist())
print("Number of canonical state groups:", len(arb.groups))
print("Collision qubits beyond first per group:", arb.collision_count)

# Show the largest collision group (if any)
largest = max(arb.groups.items(), key=lambda kv: len(kv[1]))
print("Largest canonical group size:", len(largest[1]))
if len(largest[1]) > 1:
    print("Canonical packed state (u32):", largest[0], "qubits:", largest[1][:16], "...")
else:
    print("No collisions (all canonical states unique in this run).")

# Optional: demonstrate that canonical states can be shared while addresses are unique
# Pick a collision group if present
coll_groups = [qs for qs in arb.groups.values() if len(qs) > 1]
if coll_groups:
    qs = coll_groups[0]
    print("\nExample shared canonical state group:", qs)
    print("Canonical u32:", int(arb.canonical_state_u32[qs[0]]))
    print("Unique IDs:", [int(arb.unique_state_id_u64[q]) for q in qs])
else:
    print("\nNo shared canonical state groups found in this random sample (this can happen).")

=== ISA STEP OUTPUTS ===
primaries: (64, 6, 2)
pairs: (64, 30, 2)
triplets: (64, 10, 3, 2)
collapse_mask: (64, 30) <dtype: 'int32'>
parity_mask: (64, 30) <dtype: 'int32'>
bits: (64, 30) <dtype: 'int32'>
bitmasks_30x_u64: (30,) <dtype: 'uint64'>
Total bits per instruction (Q*30): 1920
Mask[0] u64: 14272570256839023417
Mask[1] u64: 4174173816870528198
Mask[29] u64: 2064223497469565654

Commitment[0] (hex): 7c215633473d302941487448075d792c120afef8194b5537a836ec3a1a099245
spin_vec[0]:
 [[-0.19449411  0.3380893   0.9207973 ]
 [-0.33723992  0.19596317  0.9207973 ]]
i_vec[0] (norm): 1.0

=== GLOBAL STATE ARBITRATION ===
Unique address IDs per qubit (first 5): [1538999814548737703, 5393819066989283255, 7360347812077258044, 15690857822044680976, 3367412987148423094]
Number of canonical state groups: 46
Collision qubits beyond first per group: 18
Largest canonical group size: 3
Canonical packed state (u32): 735120457 qubits: [4, 20, 32] ...

Example shared canonical state group: [4, 20, 32]
Cano

In [None]:
# Colab-ready single-cell prototype that implements:
# 1) NEC primaries with explicit real/unreal channels: (x,xi,y,yi,z,zi) where x,y,z >=0 and xi,yi,zi <=0.
# 2) Base NEC sign pattern controls whether each axis pair is ordered as (real,unreal) or (unreal,real).
#    This matches your example: x,-y,z -> (x,xi, yi,y, z,zi) -> 100110 under sign->bit mapping.
# 3) Initiator selection per qubit based on *relative position* (frequency of shared axis magnitudes across the array).
# 4) Initiator-dependent permutation of the 6 primaries before expansion to pairs/triplets.
# 5) Keeps triplet loops + scatter updates (as in your original intent).
# 6) Produces:
#    - bits_selected: [Q,30] for the chosen initiator per qubit
#    - bits_views: [Q,3,30] for initiator x/y/z (simultaneous informational vectors)
#    - bit-sliced masks: [3,30] uint64 for Q=64 (3*1920 bits per instruction “view family”)

import tensorflow as tf
import numpy as np
import hashlib
import math

# -----------------------------
# Config / constants
# -----------------------------
TAU_HI      = 1.0
TAU_LOW     = -1.0
EPS         = 1e-6
R_FOR_RATIO = 64.0

PRIME_MASK = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

TRIPLET_IDX = tf.constant(
    [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12,13,14],
     [15,16,17],[18,19,20],[21,22,23],[24,25,26],[27,28,29]],
    dtype=tf.int32
)

# -----------------------------
# Phase-dual helper ops
# -----------------------------
def add_pd(a,b): return a+b
def mul_pd(a,b): return a*b

# -----------------------------
# Bit-slicing for Q<=64
# -----------------------------
def bitslice_30_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    Q = tf.shape(bits30_i32)[0]
    weights = tf.bitwise.left_shift(tf.constant(1, tf.uint64), tf.cast(tf.range(Q), tf.uint64))  # [Q]
    bits_u64 = tf.cast(bits30_i32, tf.uint64)  # [Q,30]
    masks = tf.reduce_sum(tf.transpose(bits_u64, [1,0]) * tf.expand_dims(weights, axis=0), axis=1)  # [30]
    return masks

# -----------------------------
# Core ISA: pairs and triplets (same as your original reduced 30-register)
# -----------------------------
def compute_pairs(prim: tf.Tensor) -> tf.Tensor:
    # prim: [Q,6,2] float32 -> pairs: [Q,30,2]
    x, xi, y, yi, z, zi = tf.unstack(prim, axis=1)
    pairs = tf.stack([
        x, xi, y, yi, z, zi,
        add_pd(x, y),   mul_pd(x, y),  add_pd(x, yi),  mul_pd(x, yi),
        add_pd(xi, y),  mul_pd(xi, y), add_pd(xi, yi), mul_pd(xi, yi),
        add_pd(x, z),   mul_pd(x, z),  add_pd(x, zi),  mul_pd(x, zi),
        add_pd(xi, z),  mul_pd(xi, z), add_pd(xi, zi), mul_pd(xi, zi),
        add_pd(y, z),   mul_pd(y, z),  add_pd(y, zi),  mul_pd(y, zi),
        add_pd(yi, z),  mul_pd(yi, z), add_pd(yi, zi), mul_pd(yi, zi)
    ], axis=1)
    return pairs

def group_triplets(pairs: tf.Tensor) -> tf.Tensor:
    return tf.gather(pairs, TRIPLET_IDX, axis=1)

# -----------------------------
# Collapse: triplet loops + scatter update (kept)
# -----------------------------
def detect_collapse_triplet_scatter(pairs: tf.Tensor,
                                   tau_hi: float = TAU_HI,
                                   tau_low: float = TAU_LOW,
                                   r_for_ratio: float = R_FOR_RATIO) -> tf.Tensor:
    real = pairs[..., 0]    # [Q,30]
    unreal = pairs[..., 1]  # [Q,30]
    Q = tf.shape(pairs)[0]

    cond1 = tf.logical_and(real >= tau_hi, unreal <= tau_low)
    ratio = tf.where(tf.abs(unreal) > EPS, real / unreal, tf.zeros_like(real))
    cond2 = ratio > r_for_ratio
    individual = tf.logical_or(cond1, cond2)  # [Q,30] bool

    final_mask = tf.cast(individual, tf.int32)

    for t in tf.range(10):
        idx3 = TRIPLET_IDX[t]  # [3]
        trip_ind = tf.gather(individual, idx3, axis=1)  # [Q,3]

        is_uniform = tf.reduce_all(tf.equal(trip_ind, trip_ind[:, 0:1]), axis=1)  # [Q]
        uniform_val = tf.cast(trip_ind[:, 0], tf.int32)  # [Q]

        updates_for_triplet = tf.where(
            tf.expand_dims(is_uniform, axis=1),
            tf.tile(tf.expand_dims(uniform_val, axis=1), [1,3]),
            tf.cast(trip_ind, tf.int32)
        )  # [Q,3]

        q_idx = tf.repeat(tf.range(Q), repeats=3)     # [Q*3]
        p_idx = tf.tile(idx3, multiples=[Q])          # [Q*3]
        scatter_idx = tf.stack([q_idx, p_idx], axis=1)

        final_mask = tf.tensor_scatter_nd_update(final_mask, scatter_idx, tf.reshape(updates_for_triplet, [-1]))

    return final_mask

def apply_parity_rotation(pairs: tf.Tensor, collapse_mask: tf.Tensor, prime_mask: tf.Tensor = PRIME_MASK):
    Q = tf.shape(pairs)[0]
    prime = tf.broadcast_to(tf.cast(prime_mask, tf.int32)[tf.newaxis, :], [Q, 30])
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32)
    sign = tf.where(affected > 0, tf.constant(-1.0, tf.float32), tf.constant(1.0, tf.float32))
    rotated = pairs * sign[..., tf.newaxis]
    return rotated, affected

def bitmap(rotated_pairs: tf.Tensor, eps: float = EPS) -> tf.Tensor:
    return tf.cast(rotated_pairs[..., 0] > eps, tf.int32)

# -----------------------------
# NEW: NEC construction and initiator logic from your spec
# -----------------------------
def build_primaries_from_nec(mag_xyz_i32: tf.Tensor, base_sign_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    mag_xyz_i32: [Q,3] int32 magnitudes, >=0
    base_sign_xyz_i32: [Q,3] int32 in {-1,+1} giving base NEC sign per axis (unreal/real)

    Returns primaries [Q,6,2] float32 with real/unreal channels:
      real axis value  = +mag
      unreal axis value= -mag
    And ordering inside each (axis, axis_i) pair depends on base sign:
      if base sign is +1 (real):  emit (real, unreal)
      if base sign is -1 (unreal):emit (unreal, real)
    The second float component is kept equal to the first (phase-dual placeholder); you can extend later.
    """
    mag = tf.cast(mag_xyz_i32, tf.float32)  # [Q,3]
    real = mag
    unreal = -mag

    # For each axis, choose first/second in the pair based on base sign
    # base_sign = +1 => (real, unreal), base_sign = -1 => (unreal, real)
    sign_is_unreal = base_sign_xyz_i32 < 0  # [Q,3] bool

    first = tf.where(sign_is_unreal, unreal, real)   # [Q,3]
    second= tf.where(sign_is_unreal, real, unreal)   # [Q,3]

    # Expand to [Q,6] in order x,xi,y,yi,z,zi (before initiator permutation)
    # Note: "xi" here means the second element of the pair (not always negative); it is "the dual channel"
    x0,x1 = first[:,0], second[:,0]
    y0,y1 = first[:,1], second[:,1]
    z0,z1 = first[:,2], second[:,2]

    prim_scalar = tf.stack([x0,x1,y0,y1,z0,z1], axis=1)  # [Q,6]

    # phase-dual placeholder: [real, unreal] components identical for now
    prim = tf.stack([prim_scalar, prim_scalar], axis=2)  # [Q,6,2] float32
    return prim

def initiator_from_shared_counts(mag_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    Implements your initiator rule using axis magnitude sharing across the array.
    mag_xyz_i32: [Q,3] magnitudes (abs NEC components)
    Returns initiator axis index per qubit: 0=x, 1=y, 2=z

    Rule set consistent with your examples:
      Let cx,cy,cz be counts of how many qubits share that axis magnitude.
      - If exactly one axis is shared (count>1): initiator = that axis (most-shared axis exists uniquely)
      - If exactly two axes are shared: initiator = the odd one out (the unshared axis)
      - If all three are shared: initiator = axis with max count (most shared)
      - If none are shared: initiator = axis with max magnitude (tie-break x>y>z)
    """
    Q = tf.shape(mag_xyz_i32)[0]
    mag = mag_xyz_i32  # [Q,3]

    # counts per axis via equality matrix (QxQ); Q=64 so OK
    # counts_x[q] = sum_{k} [mag[q,0] == mag[k,0]]
    eqx = tf.equal(tf.expand_dims(mag[:,0], 1), tf.expand_dims(mag[:,0], 0))
    eqy = tf.equal(tf.expand_dims(mag[:,1], 1), tf.expand_dims(mag[:,1], 0))
    eqz = tf.equal(tf.expand_dims(mag[:,2], 1), tf.expand_dims(mag[:,2], 0))
    cx = tf.reduce_sum(tf.cast(eqx, tf.int32), axis=1)  # [Q]
    cy = tf.reduce_sum(tf.cast(eqy, tf.int32), axis=1)
    cz = tf.reduce_sum(tf.cast(eqz, tf.int32), axis=1)

    shared_mask = tf.stack([cx>1, cy>1, cz>1], axis=1)  # [Q,3] bool
    shared_count = tf.reduce_sum(tf.cast(shared_mask, tf.int32), axis=1)  # [Q]

    counts = tf.stack([cx,cy,cz], axis=1)  # [Q,3]
    mags = mag  # [Q,3]

    # Case A: exactly one shared -> argmax(counts)
    init_one_shared = tf.argmax(counts, axis=1, output_type=tf.int32)

    # Case B: exactly two shared -> choose unshared axis (where count==1)
    # If two are shared, exactly one axis will have count==1.
    is_unshared = tf.equal(counts, 1)  # [Q,3] bool
    init_two_shared = tf.argmax(tf.cast(is_unshared, tf.int32), axis=1, output_type=tf.int32)

    # Case C: three shared -> axis with max count
    init_three_shared = tf.argmax(counts, axis=1, output_type=tf.int32)

    # Case D: none shared -> axis with max magnitude; tie-break by preferring lower index
    # Implement tie-break by adding small bias: x + 0.002, y + 0.001, z + 0.000
    bias = tf.constant([2,1,0], tf.int32)
    mags_biased = mags * 1000 + bias  # [Q,3]
    init_none_shared = tf.argmax(mags_biased, axis=1, output_type=tf.int32)

    initiator = tf.where(shared_count == 1, init_one_shared,
                 tf.where(shared_count == 2, init_two_shared,
                 tf.where(shared_count == 3, init_three_shared,
                          init_none_shared)))
    return initiator  # [Q]

def permute_primaries_by_initiator(primaries_6: tf.Tensor, initiator_axis: tf.Tensor) -> tf.Tensor:
    """
    primaries_6: [Q,6,2] with axis pair order already set by base NEC sign
    initiator_axis: [Q] in {0,1,2} choosing initiator x/y/z

    Permutation is cyclic by axis pairs (2-wide):
      init x: [x,xi, y,yi, z,zi]  (pairs: X,Y,Z)
      init y: [y,yi, z,zi, x,xi]  (pairs: Y,Z,X)
      init z: [z,zi, x,xi, y,yi]  (pairs: Z,X,Y)
    """
    Q = tf.shape(primaries_6)[0]

    # Build per-qubit indices [6] depending on initiator
    idx_x = tf.constant([0,1,2,3,4,5], tf.int32)
    idx_y = tf.constant([2,3,4,5,0,1], tf.int32)
    idx_z = tf.constant([4,5,0,1,2,3], tf.int32)

    # Gather indices per qubit: [Q,6]
    idx = tf.where(tf.expand_dims(initiator_axis==0,1), tf.broadcast_to(idx_x, [Q,6]),
          tf.where(tf.expand_dims(initiator_axis==1,1), tf.broadcast_to(idx_y, [Q,6]),
                                                          tf.broadcast_to(idx_z, [Q,6])))

    # Gather along axis=1 with batch dims
    batch = tf.expand_dims(tf.range(Q, dtype=tf.int32), axis=1)  # [Q,1]
    gather_nd = tf.concat([batch, idx], axis=1)  # WRONG shape for gather_nd; use tf.gather with batch_dims=1
    perm = tf.gather(primaries_6, idx, axis=1, batch_dims=1)
    return perm

# -----------------------------
# Compute bits for a given primaries arrangement
# -----------------------------
def bits_from_primaries(prim: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
    pairs = compute_pairs(prim)
    triplets = group_triplets(pairs)
    collapse = detect_collapse_triplet_scatter(pairs)
    rotated, parity = apply_parity_rotation(pairs, collapse)
    bits = bitmap(rotated)
    return bits, pairs, triplets, collapse, parity

# -----------------------------
# Demo setup
# -----------------------------
tf.random.set_seed(11)
np.random.seed(11)

Q = 64  # target for 64-lane masks
assert Q <= 64

# Build magnitudes so sharing actually happens (small range encourages collisions)
mag_xyz = tf.random.uniform([Q,3], minval=0, maxval=64, dtype=tf.int32)

# Base NEC sign pattern per axis in {-1,+1}
# -1 => unreal phase initiated for that axis (pair order swaps)
sign_bits = tf.random.uniform([Q,3], minval=0, maxval=2, dtype=tf.int32)
base_sign = tf.where(sign_bits>0, tf.constant(1, tf.int32), tf.constant(-1, tf.int32))

# Build [Q,6,2] primaries from NEC magnitudes + sign-defined pair ordering
prim0 = build_primaries_from_nec(mag_xyz, base_sign)  # [Q,6,2]

# Determine per-qubit initiator axis from shared magnitude counts across array
initiator = initiator_from_shared_counts(mag_xyz)  # [Q], values 0/1/2

# Produce three "view" primaries (init=x, init=y, init=z) simultaneously
prim_view_x = permute_primaries_by_initiator(prim0, tf.zeros([Q], tf.int32))
prim_view_y = permute_primaries_by_initiator(prim0, tf.ones([Q], tf.int32))
prim_view_z = permute_primaries_by_initiator(prim0, tf.fill([Q], tf.constant(2, tf.int32)))

# Produce selected primaries (per qubit chosen initiator)
prim_sel = permute_primaries_by_initiator(prim0, initiator)

# Compute 30-bit outputs for each view and selected
bits_x, *_ = bits_from_primaries(prim_view_x)
bits_y, *_ = bits_from_primaries(prim_view_y)
bits_z, *_ = bits_from_primaries(prim_view_z)
bits_sel, pairs_sel, trip_sel, coll_sel, par_sel = bits_from_primaries(prim_sel)

# Stack views: [Q,3,30]
bits_views = tf.stack([bits_x, bits_y, bits_z], axis=1)

# Bit-sliced masks per view: [3,30] uint64
masks_views = tf.stack([bitslice_30_tf(bits_x), bitslice_30_tf(bits_y), bitslice_30_tf(bits_z)], axis=0)
masks_sel = bitslice_30_tf(bits_sel)  # [30] uint64 for selected initiator per qubit

# -----------------------------
# Print / inspect
# -----------------------------
print("=== NEC / Initiator Prototype ===")
print("mag_xyz shape:", mag_xyz.shape, "base_sign shape:", base_sign.shape)
print("prim0 shape:", prim0.shape)
print("initiator axis counts:", np.bincount(initiator.numpy(), minlength=3), " (0=x,1=y,2=z)")

print("\n=== Bit outputs ===")
print("bits_sel:", bits_sel.shape, "bits_views:", bits_views.shape)
print("Total bits per view (Q*30):", Q*30, "Total bits across 3 initiators:", 3*Q*30)

print("\nSelected initiator masks (30×u64):")
ms = masks_sel.numpy()
print(" mask[0]:", int(ms[0]))
print(" mask[1]:", int(ms[1]))
print(" mask[29]:", int(ms[29]))

print("\nView masks shape:", masks_views.shape, "dtype:", masks_views.dtype)
print("View X mask[0]:", int(masks_views.numpy()[0,0]))
print("View Y mask[0]:", int(masks_views.numpy()[1,0]))
print("View Z mask[0]:", int(masks_views.numpy()[2,0]))

# Show a few qubits for sanity: NEC, initiator, and 6-bit primary shadow implied by pair ordering
# Primary shadow here is simply sign-based bits of the first 6 primaries (positive->1, negative->0)
prim6_scalar = prim_sel[...,0]  # [Q,6], since we duplicated channels
primary_bits6 = (prim6_scalar > 0).numpy().astype(np.int32)

print("\nSample qubits (q, NEC mag, NEC sign, initiator, primary6bits):")
mag_np = mag_xyz.numpy()
sign_np = base_sign.numpy()
init_np = initiator.numpy()
for q in range(8):
    print(q, mag_np[q].tolist(), sign_np[q].tolist(), int(init_np[q]), primary_bits6[q].tolist())

=== NEC / Initiator Prototype ===
mag_xyz shape: (64, 3) base_sign shape: (64, 3)
prim0 shape: (64, 6, 2)
initiator axis counts: [24 22 18]  (0=x,1=y,2=z)

=== Bit outputs ===
bits_sel: (64, 30) bits_views: (64, 3, 30)
Total bits per view (Q*30): 1920 Total bits across 3 initiators: 5760

Selected initiator masks (30×u64):
 mask[0]: 3067302693352695933
 mask[1]: 6156069068624172930
 mask[29]: 12857919008426346258

View masks shape: (3, 30) dtype: <dtype: 'uint64'>
View X mask[0]: 3155114210651005029
View Y mask[0]: 14884463583173914205
View Z mask[0]: 9059472057987653975

Sample qubits (q, NEC mag, NEC sign, initiator, primary6bits):
0 [56, 38, 47] [1, 1, 1] 2 [1, 0, 1, 0, 1, 0]
1 [44, 2, 2] [-1, -1, 1] 1 [0, 1, 1, 0, 0, 1]
2 [8, 57, 40] [1, 1, 1] 0 [1, 0, 1, 0, 1, 0]
3 [4, 59, 56] [-1, 1, -1] 1 [1, 0, 0, 1, 0, 1]
4 [36, 51, 43] [-1, 1, 1] 1 [1, 0, 1, 0, 0, 1]
5 [46, 56, 18] [1, -1, -1] 0 [1, 0, 0, 1, 0, 1]
6 [49, 54, 54] [1, 1, 1] 2 [1, 0, 1, 0, 1, 0]
7 [46, 28, 62] [-1, -1, -1] 2 [0,

In [None]:
import tensorflow as tf
import numpy as np
import hashlib
import math
from dataclasses import dataclass
from typing import Tuple, Dict, List

# -----------------------------
# Constants / knobs
# -----------------------------
Q = 64
TAU_HI      = 1.0
TAU_LOW     = -1.0
EPS         = 1e-6
R_FOR_RATIO = 64.0

PRIME_MASK_30 = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# 10 triplets over 30 slots: [0..2],[3..5],...,[27..29]
TRIPLET_IDX = tf.constant([[3*t,3*t+1,3*t+2] for t in range(10)], dtype=tf.int32)

# -----------------------------
# Utilities: pack/bitslice
# -----------------------------
def pack30_to_u32_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    # bits30_i32: [Q,30] int32 {0,1} -> [Q] uint32
    bits_u32 = tf.cast(bits30_i32, tf.uint32)
    shifts = tf.cast(tf.range(30), tf.uint32)
    return tf.reduce_sum(tf.bitwise.left_shift(bits_u32, shifts), axis=1)

def bitslice_30_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    # [Q,30] -> [30] uint64 lane masks (Q<=64)
    weights = tf.bitwise.left_shift(tf.constant(1, tf.uint64), tf.cast(tf.range(tf.shape(bits30_i32)[0]), tf.uint64))
    bits_u64 = tf.cast(bits30_i32, tf.uint64)
    return tf.reduce_sum(tf.transpose(bits_u64, [1,0]) * tf.expand_dims(weights, axis=0), axis=1)

def pack90_from_3x30_u32(packed3_u32: np.ndarray) -> np.ndarray:
    """
    packed3_u32: [Q,3] uint32, each holds 30 bits
    Returns: [Q,2] uint64 packing 90 bits:
      low64  = bits0[0..29] | bits1[0..29]<<30 | bits2[0..3]<<60
      high64 = bits2[4..29] in low bits (26 bits)
    """
    Qn = packed3_u32.shape[0]
    out = np.zeros((Qn,2), dtype=np.uint64)
    for q in range(Qn):
        b0 = np.uint64(packed3_u32[q,0] & np.uint32((1<<30)-1))
        b1 = np.uint64(packed3_u32[q,1] & np.uint32((1<<30)-1))
        b2 = np.uint64(packed3_u32[q,2] & np.uint32((1<<30)-1))
        low = b0 | (b1 << np.uint64(30)) | ((b2 & np.uint64(0xF)) << np.uint64(60))
        high = (b2 >> np.uint64(4))  # 26 bits
        out[q,0] = low
        out[q,1] = high
    return out

# -----------------------------
# NEC selectors (true phase-dual) + initiator logic
# -----------------------------
def sel_real(m: tf.Tensor) -> tf.Tensor:
    # m: [Q] float -> [Q,2] = [+m, -m]
    return tf.stack([m, -m], axis=1)

def sel_unreal(m: tf.Tensor) -> tf.Tensor:
    # [Q,2] = [-m, +m]
    return tf.stack([-m, m], axis=1)

def build_primaries_from_nec(mag_xyz_i32: tf.Tensor, base_sign_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    mag_xyz_i32: [Q,3] int32 magnitudes >=0 (NEC absolute coordinate)
    base_sign_xyz_i32: [Q,3] int32 in {-1,+1} giving base NEC sign per axis:
      +1 => axis initiated real  => pair order (RealSelector, UnrealSelector)
      -1 => axis initiated unreal=> pair order (UnrealSelector, RealSelector)

    Returns primaries [Q,6,2] float32 in canonical pair sequence:
      [X0, X1, Y0, Y1, Z0, Z1] where each Xi/Yi/Zi is itself a 2-vector [real, unreal].
    """
    mag = tf.cast(mag_xyz_i32, tf.float32)
    mX, mY, mZ = mag[:,0], mag[:,1], mag[:,2]
    Xr, Xu = sel_real(mX), sel_unreal(mX)
    Yr, Yu = sel_real(mY), sel_unreal(mY)
    Zr, Zu = sel_real(mZ), sel_unreal(mZ)

    sign_is_unreal = base_sign_xyz_i32 < 0  # [Q,3] bool

    X0 = tf.where(sign_is_unreal[:,0:1], Xu, Xr)
    X1 = tf.where(sign_is_unreal[:,0:1], Xr, Xu)
    Y0 = tf.where(sign_is_unreal[:,1:2], Yu, Yr)
    Y1 = tf.where(sign_is_unreal[:,1:2], Yr, Yu)
    Z0 = tf.where(sign_is_unreal[:,2:3], Zu, Zr)
    Z1 = tf.where(sign_is_unreal[:,2:3], Zr, Zu)

    prim = tf.stack([X0,X1,Y0,Y1,Z0,Z1], axis=1)  # [Q,6,2]
    return prim

def initiator_from_shared_counts(mag_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    Your initiator rule using shared axis magnitudes across the array.
    Returns initiator axis index per qubit: 0=x, 1=y, 2=z
    """
    mag = mag_xyz_i32  # [Q,3], int32
    eqx = tf.equal(tf.expand_dims(mag[:,0], 1), tf.expand_dims(mag[:,0], 0))
    eqy = tf.equal(tf.expand_dims(mag[:,1], 1), tf.expand_dims(mag[:,1], 0))
    eqz = tf.equal(tf.expand_dims(mag[:,2], 1), tf.expand_dims(mag[:,2], 0))
    cx = tf.reduce_sum(tf.cast(eqx, tf.int32), axis=1)
    cy = tf.reduce_sum(tf.cast(eqy, tf.int32), axis=1)
    cz = tf.reduce_sum(tf.cast(eqz, tf.int32), axis=1)

    counts = tf.stack([cx,cy,cz], axis=1)  # [Q,3]
    shared_mask = counts > 1
    shared_count = tf.reduce_sum(tf.cast(shared_mask, tf.int32), axis=1)  # [Q]

    init_one_shared = tf.argmax(counts, axis=1, output_type=tf.int32)
    is_unshared = tf.equal(counts, 1)
    init_two_shared = tf.argmax(tf.cast(is_unshared, tf.int32), axis=1, output_type=tf.int32)
    init_three_shared = tf.argmax(counts, axis=1, output_type=tf.int32)

    # none shared => max magnitude tie-break x>y>z
    bias = tf.constant([2,1,0], tf.int32)
    mags_biased = mag_xyz_i32 * 1000 + bias
    init_none_shared = tf.argmax(mags_biased, axis=1, output_type=tf.int32)

    initiator = tf.where(shared_count == 1, init_one_shared,
                 tf.where(shared_count == 2, init_two_shared,
                 tf.where(shared_count == 3, init_three_shared, init_none_shared)))
    return initiator  # [Q]

def permute_primaries_by_initiator(prim6: tf.Tensor, initiator_axis: tf.Tensor) -> tf.Tensor:
    """
    prim6: [Q,6,2] ordered as [X0,X1,Y0,Y1,Z0,Z1]
    initiator_axis: [Q] in {0,1,2}

    Cyclic by axis-pairs (2-wide):
      init x: [X0,X1, Y0,Y1, Z0,Z1]
      init y: [Y0,Y1, Z0,Z1, X0,X1]
      init z: [Z0,Z1, X0,X1, Y0,Y1]
    """
    Qn = tf.shape(prim6)[0]
    idx_x = tf.constant([0,1,2,3,4,5], tf.int32)
    idx_y = tf.constant([2,3,4,5,0,1], tf.int32)
    idx_z = tf.constant([4,5,0,1,2,3], tf.int32)

    idx = tf.where(tf.expand_dims(initiator_axis==0,1), tf.broadcast_to(idx_x, [Qn,6]),
          tf.where(tf.expand_dims(initiator_axis==1,1), tf.broadcast_to(idx_y, [Qn,6]),
                                                          tf.broadcast_to(idx_z, [Qn,6])))

    return tf.gather(prim6, idx, axis=1, batch_dims=1)

# -----------------------------
# 30-register with (+,-,*,/) semantics and add/sub-only reorder (canonicalization)
# -----------------------------
def div_pd(a: tf.Tensor, b: tf.Tensor) -> tf.Tensor:
    # safe component-wise division for phase-dual
    return tf.where(tf.abs(b) > EPS, a / b, tf.zeros_like(a))

def build_register30(prim6: tf.Tensor,
                     canonicalize_addsub: bool = True) -> Tuple[tf.Tensor, tf.Tensor]:
    """
    prim6: [Q,6,2] in initiator-permuted order [X0,X1,Y0,Y1,Z0,Z1] but "X,Y,Z" are just first/second/third axis pairs.
    Returns:
      reg: [Q,30,2]
      swap_flags: [Q,8] int32, whether add/sub swapped in each of the 8 interaction triplets (2..9)
    Layout (10 triplets):
      Triplet0: [p0, p1, p2]  (primaries)
      Triplet1: [p3, p4, p5]  (primaries)
      Triplets2..9: 8 interaction triplets:
          [ADD(u,v), SUB(u,v), OP3(u,v)]   OP3 fixed (MUL or DIV)
    Canonicalization:
      If canonicalize_addsub=True: reorder ADD/SUB so the first has >= real component of the second.
      Only those two slots reorder; MUL/DIV slot fixed.
    """
    p0,p1,p2,p3,p4,p5 = tf.unstack(prim6, axis=1)  # each [Q,2]
    # Interpret axes:
    # AxisA = (p0,p1), AxisB=(p2,p3), AxisC=(p4,p5) in this initiator frame
    A0,A1 = p0,p1
    B0,B1 = p2,p3
    C0,C1 = p4,p5

    # Choose 8 operand pairs (u,v) deterministically from these six primaries:
    # 4 with MUL fixed, 4 with DIV fixed
    pairs = [
        (A0, B0, "MUL"),
        (B0, C0, "MUL"),
        (A0, C0, "MUL"),
        (A1, B1, "MUL"),
        (A0, B1, "DIV"),
        (B0, C1, "DIV"),
        (A1, C0, "DIV"),
        (B1, C1, "DIV"),
    ]

    add_list = []
    sub_list = []
    op3_list = []
    swap_flags = []

    for (u,v,op3) in pairs:
        addv = add_pd(u,v)
        subv = u - v
        if op3 == "MUL":
            opv = mul_pd(u,v)
        else:
            opv = div_pd(u,v)

        if canonicalize_addsub:
            # swap if subv.real > addv.real (i.e., put the larger-real first)
            swap = tf.cast(subv[:,0] > addv[:,0], tf.int32)  # [Q]
            addv2 = tf.where(swap[:,None] > 0, subv, addv)
            subv2 = tf.where(swap[:,None] > 0, addv, subv)
            addv, subv = addv2, subv2
            swap_flags.append(swap)
        else:
            swap_flags.append(tf.zeros([tf.shape(u)[0]], tf.int32))

        add_list.append(addv)
        sub_list.append(subv)
        op3_list.append(opv)

    # Assemble 10 triplets = 30 slots
    trip0 = [p0,p1,p2]
    trip1 = [p3,p4,p5]

    reg = [*trip0, *trip1]
    for i in range(8):
        reg.extend([add_list[i], sub_list[i], op3_list[i]])

    reg = tf.stack(reg, axis=1)  # [Q,30,2]
    swap_flags = tf.stack(swap_flags, axis=1)  # [Q,8]
    return reg, swap_flags

# -----------------------------
# Collapse/parity/bitmap with triplet loop + scatter update retained
# -----------------------------
def detect_collapse_triplet_scatter(pairs: tf.Tensor,
                                   tau_hi: float = TAU_HI,
                                   tau_low: float = TAU_LOW,
                                   r_for_ratio: float = R_FOR_RATIO) -> tf.Tensor:
    real = pairs[..., 0]    # [Q,30]
    unreal = pairs[..., 1]  # [Q,30]
    Qn = tf.shape(pairs)[0]

    cond1 = tf.logical_and(real >= tau_hi, unreal <= tau_low)
    ratio = tf.where(tf.abs(unreal) > EPS, real / unreal, tf.zeros_like(real))
    cond2 = ratio > r_for_ratio
    individual = tf.logical_or(cond1, cond2)  # [Q,30] bool

    final_mask = tf.cast(individual, tf.int32)

    for t in tf.range(10):
        idx3 = TRIPLET_IDX[t]  # [3]
        trip_ind = tf.gather(individual, idx3, axis=1)  # [Q,3]
        is_uniform = tf.reduce_all(tf.equal(trip_ind, trip_ind[:, 0:1]), axis=1)  # [Q]
        uniform_val = tf.cast(trip_ind[:, 0], tf.int32)  # [Q]

        updates_for_triplet = tf.where(
            tf.expand_dims(is_uniform, axis=1),
            tf.tile(tf.expand_dims(uniform_val, axis=1), [1,3]),
            tf.cast(trip_ind, tf.int32)
        )  # [Q,3]

        q_idx = tf.repeat(tf.range(Qn), repeats=3)
        p_idx = tf.tile(idx3, multiples=[Qn])
        scatter_idx = tf.stack([q_idx, p_idx], axis=1)
        final_mask = tf.tensor_scatter_nd_update(final_mask, scatter_idx, tf.reshape(updates_for_triplet, [-1]))

    return final_mask

def apply_parity_rotation(pairs: tf.Tensor, collapse_mask: tf.Tensor):
    Qn = tf.shape(pairs)[0]
    prime = tf.broadcast_to(PRIME_MASK_30[tf.newaxis, :], [Qn, 30])
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32)
    sign = tf.where(affected > 0, tf.constant(-1.0, tf.float32), tf.constant(1.0, tf.float32))
    rotated = pairs * sign[..., tf.newaxis]
    return rotated, affected

def bitmap(rotated_pairs: tf.Tensor) -> tf.Tensor:
    return tf.cast(rotated_pairs[..., 0] > EPS, tf.int32)

# -----------------------------
# One view compute: prim6 -> reg30 -> collapse/parity -> bits30
# -----------------------------
def compute_bits_for_view(prim6: tf.Tensor,
                          canonicalize_addsub: bool = True) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
    reg30, swap_flags = build_register30(prim6, canonicalize_addsub=canonicalize_addsub)
    collapse = detect_collapse_triplet_scatter(reg30)
    rotated, parity = apply_parity_rotation(reg30, collapse)
    bits = bitmap(rotated)
    return bits, swap_flags, collapse, parity

# -----------------------------
# Commitments keyed off full 3-view payload
# -----------------------------
def commit_full90(packed_u32_xyz: np.ndarray,
                  initiator_axis: np.ndarray,
                  swapcount_xyz: np.ndarray,
                  domain_sep: bytes = b"NTHISA90") -> List[bytes]:
    """
    packed_u32_xyz: [Q,3] uint32 (x/y/z view packed 30-bit)
    initiator_axis: [Q] uint8
    swapcount_xyz:  [Q,3] uint8 (#swaps in 8 interaction triplets per view; 0..8)
    """
    commits = []
    for q in range(packed_u32_xyz.shape[0]):
        msg = (
            domain_sep +
            int(packed_u32_xyz[q,0]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,1]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,2]).to_bytes(4,"little",signed=False) +
            int(initiator_axis[q]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,0]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,1]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,2]).to_bytes(1,"little",signed=False)
        )
        commits.append(hashlib.blake2s(msg, digest_size=32).digest())
    return commits

# -----------------------------
# Demo run
# -----------------------------
tf.random.set_seed(11)
np.random.seed(11)

# NEC magnitudes (small range encourages shared values)
mag_xyz = tf.random.uniform([Q,3], minval=0, maxval=64, dtype=tf.int32)

# NEC base sign pattern in {-1,+1}
sign_bits = tf.random.uniform([Q,3], minval=0, maxval=2, dtype=tf.int32)
base_sign = tf.where(sign_bits>0, tf.constant(1, tf.int32), tf.constant(-1, tf.int32))

prim0 = build_primaries_from_nec(mag_xyz, base_sign)  # [Q,6,2] canonical pair order X,Y,Z
initiator = initiator_from_shared_counts(mag_xyz)     # [Q] axis 0/1/2

# Three global views (init=x/y/z) and per-qubit selected init
prim_x = permute_primaries_by_initiator(prim0, tf.zeros([Q], tf.int32))
prim_y = permute_primaries_by_initiator(prim0, tf.ones([Q], tf.int32))
prim_z = permute_primaries_by_initiator(prim0, tf.fill([Q], tf.constant(2, tf.int32)))
prim_sel = permute_primaries_by_initiator(prim0, initiator)

# Compute bits for each view with add/sub canonicalization enabled
bits_x, swaps_x, coll_x, par_x = compute_bits_for_view(prim_x, canonicalize_addsub=True)
bits_y, swaps_y, coll_y, par_y = compute_bits_for_view(prim_y, canonicalize_addsub=True)
bits_z, swaps_z, coll_z, par_z = compute_bits_for_view(prim_z, canonicalize_addsub=True)
bits_sel, swaps_sel, coll_sel, par_sel = compute_bits_for_view(prim_sel, canonicalize_addsub=True)

# Stack into [Q,3,30]
bits_views = tf.stack([bits_x, bits_y, bits_z], axis=1)

# Pack per view: [Q,3] uint32
packed_x = pack30_to_u32_tf(bits_x)
packed_y = pack30_to_u32_tf(bits_y)
packed_z = pack30_to_u32_tf(bits_z)
packed_views_u32 = tf.stack([packed_x, packed_y, packed_z], axis=1)

packed_views_np = packed_views_u32.numpy().astype(np.uint32)

# Pack full 90-bit payload into 2x u64
packed90_u64 = pack90_from_3x30_u32(packed_views_np)  # [Q,2] uint64

# Swap-counts per view (number of add/sub swaps across the 8 interaction triplets)
swapcount_x = tf.reduce_sum(swaps_x, axis=1)  # [Q]
swapcount_y = tf.reduce_sum(swaps_y, axis=1)
swapcount_z = tf.reduce_sum(swaps_z, axis=1)
swapcount_xyz = tf.stack([swapcount_x, swapcount_y, swapcount_z], axis=1).numpy().astype(np.uint8)

# Commitments based on full 90-bit payload + initiator selection + swap stats
commits = commit_full90(
    packed_u32_xyz=packed_views_np,
    initiator_axis=initiator.numpy().astype(np.uint8),
    swapcount_xyz=swapcount_xyz
)

# Bit-sliced masks per view: [3,30] uint64
masks_views = tf.stack([bitslice_30_tf(bits_x), bitslice_30_tf(bits_y), bitslice_30_tf(bits_z)], axis=0)

# Selected initiator masks (30×u64)
masks_sel = bitslice_30_tf(bits_sel)

# -----------------------------
# Display summary
# -----------------------------
print("=== UniversalISA v0.3 Summary ===")
print("mag_xyz:", mag_xyz.shape, "base_sign:", base_sign.shape)
print("prim0:", prim0.shape, "initiator:", initiator.shape)
print("initiator axis counts:", np.bincount(initiator.numpy(), minlength=3), "(0=x,1=y,2=z)")

print("\nPayload shapes:")
print("bits_views:", bits_views.shape, " (Q,3,30) => 90 bits/qubit")
print("packed_views_u32:", packed_views_u32.shape, packed_views_u32.dtype, " (3×u32 per qubit)")
print("packed90_u64:", packed90_u64.shape, packed90_u64.dtype, " (2×u64 per qubit)")
print("Total bits per 64-qubit array:", 64*90)

print("\nMasks:")
print("masks_views:", masks_views.shape, masks_views.dtype, " (3 views × 30 lane-masks)")
print("selected masks:", masks_sel.shape, masks_sel.dtype)

print("\nCommitment[0] (hex):", commits[0].hex())
print("packed_views_u32[0]:", packed_views_np[0].tolist())
print("packed90_u64[0]:", packed90_u64[0].tolist(), "(low64, high64)")
print("swapcounts per view (q0):", swapcount_xyz[0].tolist(), " initiator(q0):", int(initiator.numpy()[0]))

# Show a few qubits: NEC, sign, initiator, primary 6-bit shadow for selected initiator view
# Primary bits: 1 if selector.real>0 else 0 for the first 6 slots in the selected primaries
prim6_real = prim_sel[...,0].numpy()  # [Q,6]
primary6bits = (prim6_real > 0).astype(np.int32)

print("\nSample qubits (q, NEC mag, NEC sign, initiator, primary6bits, packedXYZ):")
mag_np = mag_xyz.numpy()
sign_np = base_sign.numpy()
init_np = initiator.numpy()
for q in range(8):
    print(q, mag_np[q].tolist(), sign_np[q].tolist(), int(init_np[q]), primary6bits[q].tolist(), packed_views_np[q].tolist())

=== UniversalISA v0.3 Summary ===
mag_xyz: (64, 3) base_sign: (64, 3)
prim0: (64, 6, 2) initiator: (64,)
initiator axis counts: [24 22 18] (0=x,1=y,2=z)

Payload shapes:
bits_views: (64, 3, 30)  (Q,3,30) => 90 bits/qubit
packed_views_u32: (64, 3) <dtype: 'uint32'>  (3×u32 per qubit)
packed90_u64: (64, 2) uint64  (2×u64 per qubit)
Total bits per 64-qubit array: 5760

Masks:
masks_views: (3, 30) <dtype: 'uint64'>  (3 views × 30 lane-masks)
selected masks: (30,) <dtype: 'uint64'>

Commitment[0] (hex): 2e161a780261e4e75b64f349284517be142f41fa53a7fe930423e2d929d45760
packed_views_u32[0]: [8405288, 8937896, 8929704]
packed90_u64[0]: [9232969029616943400, 558106] (low64, high64)
swapcounts per view (q0): [4, 4, 4]  initiator(q0): 2

Sample qubits (q, NEC mag, NEC sign, initiator, primary6bits, packedXYZ):
0 [56, 38, 47] [1, 1, 1] 2 [1, 0, 1, 0, 1, 0] [8405288, 8937896, 8929704]
1 [44, 2, 2] [-1, -1, 1] 1 [0, 1, 1, 0, 0, 1] [604514724, 538601608, 77201540]
2 [8, 57, 40] [1, 1, 1] 0 [1, 0, 1, 0

In [None]:
# UniversalISA prototype v0.4 (Colab single-cell)
# Adds to v0.3:
# - Batch-level DEDUP/ALIAS (state sharing detection) keyed off FULL 90-bit commitment
# - Winner selection + alias map + freed qubits list
# - Feed-forward state registers (tag_u64, initiator, swap counts)
# - Second instruction step that executes only ACTIVE (winner) qubits and fills aliases deterministically
# - Mode switch:
#     MODE="FREE"      => aliases are reassigned new NEC work states for next instruction
#     MODE="PAIR_HUNT" => aliases stay tied to winner state (useful for paired-state hunting)
#
# Notes:
# - This is still a prototype; float32 math is not guaranteed bit-identical across all accelerators.
# - The structure (triplet loops + scatter updates) is preserved.
# - × and ÷ slots are fixed; only (ADD,SUB) can reorder via canonicalization.

import tensorflow as tf
import numpy as np
import hashlib
import math
from dataclasses import dataclass
from typing import List, Dict, Tuple

# -----------------------------
# Global knobs
# -----------------------------
Q = 64
MODE = "FREE"          # "FREE" or "PAIR_HUNT"
INSTR_STEPS = 2        # demonstrate 2 instruction steps

TAU_HI      = 1.0
TAU_LOW     = -1.0
EPS         = 1e-6
R_FOR_RATIO = 64.0

PRIME_MASK_30 = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

TRIPLET_IDX = tf.constant([[3*t,3*t+1,3*t+2] for t in range(10)], dtype=tf.int32)

# -----------------------------
# Pack/bitslice utilities
# -----------------------------
def pack30_to_u32_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    bits_u32 = tf.cast(bits30_i32, tf.uint32)
    shifts = tf.cast(tf.range(30), tf.uint32)
    return tf.reduce_sum(tf.bitwise.left_shift(bits_u32, shifts), axis=1)  # [Q] u32

def bitslice_30_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    weights = tf.bitwise.left_shift(tf.constant(1, tf.uint64), tf.cast(tf.range(tf.shape(bits30_i32)[0]), tf.uint64))
    bits_u64 = tf.cast(bits30_i32, tf.uint64)
    return tf.reduce_sum(tf.transpose(bits_u64, [1,0]) * tf.expand_dims(weights, axis=0), axis=1)  # [30] u64

def pack90_from_3x30_u32(packed3_u32: np.ndarray) -> np.ndarray:
    # packed3_u32: [Q,3] uint32 -> [Q,2] uint64
    out = np.zeros((packed3_u32.shape[0],2), dtype=np.uint64)
    for q in range(packed3_u32.shape[0]):
        b0 = np.uint64(packed3_u32[q,0] & np.uint32((1<<30)-1))
        b1 = np.uint64(packed3_u32[q,1] & np.uint32((1<<30)-1))
        b2 = np.uint64(packed3_u32[q,2] & np.uint32((1<<30)-1))
        low = b0 | (b1 << np.uint64(30)) | ((b2 & np.uint64(0xF)) << np.uint64(60))
        high = (b2 >> np.uint64(4))
        out[q,0] = low
        out[q,1] = high
    return out

# -----------------------------
# Phase-dual core ops
# -----------------------------
def add_pd(a,b): return a+b
def mul_pd(a,b): return a*b
def div_pd(a,b): return tf.where(tf.abs(b) > EPS, a/b, tf.zeros_like(a))

# -----------------------------
# NEC selectors (true phase-dual) + initiator logic
# -----------------------------
def sel_real(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([m, -m], axis=1)  # [+m,-m]

def sel_unreal(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([-m, m], axis=1)  # [-m,+m]

def build_primaries_from_nec(mag_xyz_i32: tf.Tensor, base_sign_xyz_i32: tf.Tensor) -> tf.Tensor:
    mag = tf.cast(mag_xyz_i32, tf.float32)
    mX, mY, mZ = mag[:,0], mag[:,1], mag[:,2]
    Xr, Xu = sel_real(mX), sel_unreal(mX)
    Yr, Yu = sel_real(mY), sel_unreal(mY)
    Zr, Zu = sel_real(mZ), sel_unreal(mZ)

    sign_is_unreal = base_sign_xyz_i32 < 0  # [Q,3]

    X0 = tf.where(sign_is_unreal[:,0:1], Xu, Xr)
    X1 = tf.where(sign_is_unreal[:,0:1], Xr, Xu)
    Y0 = tf.where(sign_is_unreal[:,1:2], Yu, Yr)
    Y1 = tf.where(sign_is_unreal[:,1:2], Yr, Yu)
    Z0 = tf.where(sign_is_unreal[:,2:3], Zu, Zr)
    Z1 = tf.where(sign_is_unreal[:,2:3], Zr, Zu)

    return tf.stack([X0,X1,Y0,Y1,Z0,Z1], axis=1)  # [Q,6,2]

def initiator_from_shared_counts(mag_xyz_i32: tf.Tensor) -> tf.Tensor:
    mag = mag_xyz_i32
    eqx = tf.equal(tf.expand_dims(mag[:,0], 1), tf.expand_dims(mag[:,0], 0))
    eqy = tf.equal(tf.expand_dims(mag[:,1], 1), tf.expand_dims(mag[:,1], 0))
    eqz = tf.equal(tf.expand_dims(mag[:,2], 1), tf.expand_dims(mag[:,2], 0))
    cx = tf.reduce_sum(tf.cast(eqx, tf.int32), axis=1)
    cy = tf.reduce_sum(tf.cast(eqy, tf.int32), axis=1)
    cz = tf.reduce_sum(tf.cast(eqz, tf.int32), axis=1)

    counts = tf.stack([cx,cy,cz], axis=1)  # [Q,3]
    shared_mask = counts > 1
    shared_count = tf.reduce_sum(tf.cast(shared_mask, tf.int32), axis=1)  # [Q]

    init_one_shared = tf.argmax(counts, axis=1, output_type=tf.int32)
    is_unshared = tf.equal(counts, 1)
    init_two_shared = tf.argmax(tf.cast(is_unshared, tf.int32), axis=1, output_type=tf.int32)
    init_three_shared = tf.argmax(counts, axis=1, output_type=tf.int32)

    bias = tf.constant([2,1,0], tf.int32)
    mags_biased = mag_xyz_i32 * 1000 + bias
    init_none_shared = tf.argmax(mags_biased, axis=1, output_type=tf.int32)

    initiator = tf.where(shared_count == 1, init_one_shared,
                 tf.where(shared_count == 2, init_two_shared,
                 tf.where(shared_count == 3, init_three_shared, init_none_shared)))
    return initiator

def permute_primaries_by_initiator(prim6: tf.Tensor, initiator_axis: tf.Tensor) -> tf.Tensor:
    Qn = tf.shape(prim6)[0]
    idx_x = tf.constant([0,1,2,3,4,5], tf.int32)
    idx_y = tf.constant([2,3,4,5,0,1], tf.int32)
    idx_z = tf.constant([4,5,0,1,2,3], tf.int32)

    idx = tf.where(tf.expand_dims(initiator_axis==0,1), tf.broadcast_to(idx_x, [Qn,6]),
          tf.where(tf.expand_dims(initiator_axis==1,1), tf.broadcast_to(idx_y, [Qn,6]),
                                                          tf.broadcast_to(idx_z, [Qn,6])))
    return tf.gather(prim6, idx, axis=1, batch_dims=1)

# -----------------------------
# 30-register builder: 10 triplets, add/sub reorderable, mul/div fixed
# -----------------------------
def build_register30(prim6: tf.Tensor, canonicalize_addsub: bool = True) -> Tuple[tf.Tensor, tf.Tensor]:
    p0,p1,p2,p3,p4,p5 = tf.unstack(prim6, axis=1)

    A0,A1 = p0,p1
    B0,B1 = p2,p3
    C0,C1 = p4,p5

    # 8 interaction triplets (2..9), each is [ADD,SUB,OP3] with OP3 fixed as MUL or DIV
    spec = [
        (A0, B0, "MUL"),
        (B0, C0, "MUL"),
        (A0, C0, "MUL"),
        (A1, B1, "MUL"),
        (A0, B1, "DIV"),
        (B0, C1, "DIV"),
        (A1, C0, "DIV"),
        (B1, C1, "DIV"),
    ]

    add_list, sub_list, op3_list, swap_flags = [], [], [], []
    for (u,v,op3) in spec:
        addv = add_pd(u,v)
        subv = u - v
        opv  = mul_pd(u,v) if op3=="MUL" else div_pd(u,v)

        if canonicalize_addsub:
            swap = tf.cast(subv[:,0] > addv[:,0], tf.int32)  # [Q]
            addv2 = tf.where(swap[:,None] > 0, subv, addv)
            subv2 = tf.where(swap[:,None] > 0, addv, subv)
            addv, subv = addv2, subv2
        else:
            swap = tf.zeros([tf.shape(u)[0]], tf.int32)

        add_list.append(addv); sub_list.append(subv); op3_list.append(opv); swap_flags.append(swap)

    reg = [p0,p1,p2, p3,p4,p5]
    for i in range(8):
        reg.extend([add_list[i], sub_list[i], op3_list[i]])
    reg = tf.stack(reg, axis=1)                  # [Q,30,2]
    swap_flags = tf.stack(swap_flags, axis=1)    # [Q,8]
    return reg, swap_flags

# -----------------------------
# Collapse / parity / bitmap (triplet loops + scatter update retained)
# -----------------------------
def detect_collapse_triplet_scatter(pairs: tf.Tensor,
                                   tau_hi: float = TAU_HI,
                                   tau_low: float = TAU_LOW,
                                   r_for_ratio: float = R_FOR_RATIO) -> tf.Tensor:
    real = pairs[..., 0]
    unreal = pairs[..., 1]
    Qn = tf.shape(pairs)[0]

    cond1 = tf.logical_and(real >= tau_hi, unreal <= tau_low)
    ratio = tf.where(tf.abs(unreal) > EPS, real / unreal, tf.zeros_like(real))
    cond2 = ratio > r_for_ratio
    individual = tf.logical_or(cond1, cond2)

    final_mask = tf.cast(individual, tf.int32)

    for t in tf.range(10):
        idx3 = TRIPLET_IDX[t]
        trip_ind = tf.gather(individual, idx3, axis=1)
        is_uniform = tf.reduce_all(tf.equal(trip_ind, trip_ind[:,0:1]), axis=1)
        uniform_val = tf.cast(trip_ind[:,0], tf.int32)

        updates = tf.where(
            tf.expand_dims(is_uniform, axis=1),
            tf.tile(tf.expand_dims(uniform_val, axis=1), [1,3]),
            tf.cast(trip_ind, tf.int32)
        )

        q_idx = tf.repeat(tf.range(Qn), repeats=3)
        p_idx = tf.tile(idx3, multiples=[Qn])
        scatter_idx = tf.stack([q_idx, p_idx], axis=1)
        final_mask = tf.tensor_scatter_nd_update(final_mask, scatter_idx, tf.reshape(updates, [-1]))

    return final_mask

def apply_parity_rotation(pairs: tf.Tensor, collapse_mask: tf.Tensor):
    Qn = tf.shape(pairs)[0]
    prime = tf.broadcast_to(PRIME_MASK_30[tf.newaxis,:], [Qn,30])
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32)
    sign = tf.where(affected > 0, tf.constant(-1.0, tf.float32), tf.constant(1.0, tf.float32))
    return pairs * sign[...,None], affected

def bitmap(rotated_pairs: tf.Tensor) -> tf.Tensor:
    return tf.cast(rotated_pairs[...,0] > EPS, tf.int32)

# -----------------------------
# One view: prim6 -> bits30 + swap flags + collapse/parity
# -----------------------------
def compute_bits_for_view(prim6: tf.Tensor, canonicalize_addsub: bool = True):
    reg30, swap_flags = build_register30(prim6, canonicalize_addsub=canonicalize_addsub)
    collapse = detect_collapse_triplet_scatter(reg30)
    rotated, parity = apply_parity_rotation(reg30, collapse)
    bits = bitmap(rotated)
    return bits, swap_flags, collapse, parity

# -----------------------------
# Commitment and tag
# -----------------------------
def commit_full90(packed_u32_xyz: np.ndarray,
                  initiator_axis: np.ndarray,
                  swapcount_xyz: np.ndarray,
                  instr_counter: int,
                  domain_sep: bytes = b"NTHISA90") -> List[bytes]:
    commits = []
    for q in range(packed_u32_xyz.shape[0]):
        msg = (
            domain_sep +
            int(instr_counter).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,0]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,1]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,2]).to_bytes(4,"little",signed=False) +
            int(initiator_axis[q]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,0]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,1]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,2]).to_bytes(1,"little",signed=False)
        )
        commits.append(hashlib.blake2s(msg, digest_size=32).digest())
    return commits

def tag_u64_from_commit(commit32: bytes, q_idx: int, instr_counter: int) -> np.uint64:
    msg = b"NTH_TAG0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    h = hashlib.blake2s(msg, digest_size=8).digest()
    return np.uint64(int.from_bytes(h, "little", signed=False))

# -----------------------------
# Dedup/Alias (batch-level)
# -----------------------------
@dataclass
class DedupResult:
    winner_of: np.ndarray       # [Q] int32: winner index for each qubit
    active_mask: np.ndarray     # [Q] bool: True for winners
    groups: Dict[bytes, List[int]]
    freed: List[int]            # indices of qubits freed (aliases) when MODE="FREE"
    collision_qubits: int

def dedup_by_commit(commits: List[bytes],
                    efficiency_score: np.ndarray) -> DedupResult:
    # Group by commit bytes
    groups: Dict[bytes, List[int]] = {}
    for q,c in enumerate(commits):
        groups.setdefault(c, []).append(q)

    winner_of = np.arange(len(commits), dtype=np.int32)
    active_mask = np.ones(len(commits), dtype=bool)
    freed = []
    collision_qubits = 0

    for c, qs in groups.items():
        if len(qs) > 1:
            collision_qubits += (len(qs) - 1)

        # Pick winner: minimal efficiency_score, then minimal q_idx
        qs_sorted = sorted(qs, key=lambda q: (int(efficiency_score[q]), q))
        w = qs_sorted[0]
        for q in qs:
            winner_of[q] = w
            if q != w:
                active_mask[q] = False
                if MODE == "FREE":
                    freed.append(q)

    return DedupResult(winner_of=winner_of, active_mask=active_mask, groups=groups,
                      freed=freed, collision_qubits=collision_qubits)

# -----------------------------
# Deterministic reassignment of freed qubits to new NEC work states (MODE="FREE")
# -----------------------------
def derive_new_nec_for_freed(commit32: bytes, q_idx: int, instr_counter: int) -> Tuple[np.ndarray, np.ndarray]:
    """
    Deterministically produce new magnitudes/signs for a freed qubit.
    Output:
      mag_xyz: [3] int32 in [0,63]
      sign_xyz: [3] int32 in {-1,+1}
    """
    msg = b"NTH_FREE0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    raw = hashlib.blake2s(msg, digest_size=16).digest()  # 16 bytes
    # 3 mags from bytes 0..2, 3 signs from byte 3 bits, etc.
    mags = np.array([raw[0] % 64, raw[1] % 64, raw[2] % 64], dtype=np.int32)
    sb = raw[3]
    signs = np.array([1 if (sb & 1) else -1, 1 if (sb & 2) else -1, 1 if (sb & 4) else -1], dtype=np.int32)
    return mags, signs

# -----------------------------
# ISA step execution
# - Can execute on all qubits, or only on a subset of active indices (for dedup demonstration)
# -----------------------------
@dataclass
class StepOut:
    instr_counter: int
    mag_xyz: np.ndarray                 # [Q,3] int32
    base_sign: np.ndarray               # [Q,3] int32
    initiator: np.ndarray               # [Q] int32
    bits_views: np.ndarray              # [Q,3,30] int32
    packed_views_u32: np.ndarray        # [Q,3] uint32
    packed90_u64: np.ndarray            # [Q,2] uint64
    swapcount_xyz: np.ndarray           # [Q,3] uint8
    commits: List[bytes]                # len Q
    tags_u64: np.ndarray                # [Q] uint64
    masks_views: np.ndarray             # [3,30] uint64 (lane masks)
    masks_selected: np.ndarray          # [30] uint64 (selected initiator)
    dedup: DedupResult

def execute_step(mag_xyz: np.ndarray,
                 base_sign: np.ndarray,
                 prev_tags: np.ndarray,
                 instr_counter: int,
                 active_indices: np.ndarray = None) -> StepOut:
    """
    If active_indices is provided, compute heavy pipeline only for those qubits.
    Others will be filled later (either by alias copy or by reassignment in the caller).
    For v0.4 demonstration, we still compute views for all at step 1 (no active_indices),
    and at step 2 we compute only active winners and then fill alias qubits.
    """
    Qn = mag_xyz.shape[0]
    assert Qn == Q

    # Convert to TF tensors
    mag_tf = tf.constant(mag_xyz, dtype=tf.int32)
    sign_tf = tf.constant(base_sign, dtype=tf.int32)

    # If active subset requested, gather active state
    if active_indices is not None:
        ai_tf = tf.constant(active_indices, dtype=tf.int32)
        mag_tf_a = tf.gather(mag_tf, ai_tf, axis=0)
        sign_tf_a = tf.gather(sign_tf, ai_tf, axis=0)
        # Build primaries for active
        prim0_a = build_primaries_from_nec(mag_tf_a, sign_tf_a)
        initiator_a = initiator_from_shared_counts(mag_tf_a)  # NOTE: initiator computed on active subset only
        # Views
        Qa = int(active_indices.shape[0])
        prim_x = permute_primaries_by_initiator(prim0_a, tf.zeros([Qa], tf.int32))
        prim_y = permute_primaries_by_initiator(prim0_a, tf.ones([Qa], tf.int32))
        prim_z = permute_primaries_by_initiator(prim0_a, tf.fill([Qa], tf.constant(2, tf.int32)))
        prim_sel = permute_primaries_by_initiator(prim0_a, initiator_a)

        bits_x, swaps_x, _, _ = compute_bits_for_view(prim_x, True)
        bits_y, swaps_y, _, _ = compute_bits_for_view(prim_y, True)
        bits_z, swaps_z, _, _ = compute_bits_for_view(prim_z, True)
        bits_sel, _, _, _ = compute_bits_for_view(prim_sel, True)

        packed_x = pack30_to_u32_tf(bits_x)
        packed_y = pack30_to_u32_tf(bits_y)
        packed_z = pack30_to_u32_tf(bits_z)
        packed_xyz = tf.stack([packed_x, packed_y, packed_z], axis=1).numpy().astype(np.uint32)

        swapcount_x = tf.reduce_sum(swaps_x, axis=1).numpy().astype(np.uint8)
        swapcount_y = tf.reduce_sum(swaps_y, axis=1).numpy().astype(np.uint8)
        swapcount_z = tf.reduce_sum(swaps_z, axis=1).numpy().astype(np.uint8)
        swapcount_xyz = np.stack([swapcount_x, swapcount_y, swapcount_z], axis=1).astype(np.uint8)

        bits_views = np.stack([bits_x.numpy().astype(np.int32),
                               bits_y.numpy().astype(np.int32),
                               bits_z.numpy().astype(np.int32)], axis=1)

        masks_views = np.stack([bitslice_30_tf(bits_x).numpy(),
                                bitslice_30_tf(bits_y).numpy(),
                                bitslice_30_tf(bits_z).numpy()], axis=0).astype(np.uint64)
        masks_sel = bitslice_30_tf(bits_sel).numpy().astype(np.uint64)

        # Commitments for active subset only (caller will expand)
        commits = commit_full90(packed_xyz, initiator_a.numpy().astype(np.uint8), swapcount_xyz, instr_counter)
        tags = np.array([tag_u64_from_commit(commits[i], int(active_indices[i]), instr_counter) for i in range(len(active_indices))], dtype=np.uint64)

        # Return a partial StepOut-like structure for active subset; caller will merge
        # Use placeholder arrays for full Q in caller
        return StepOut(
            instr_counter=instr_counter,
            mag_xyz=mag_xyz,
            base_sign=base_sign,
            initiator=np.full((Q,), -1, dtype=np.int32),
            bits_views=np.zeros((Q,3,30), dtype=np.int32),
            packed_views_u32=np.zeros((Q,3), dtype=np.uint32),
            packed90_u64=np.zeros((Q,2), dtype=np.uint64),
            swapcount_xyz=np.zeros((Q,3), dtype=np.uint8),
            commits=[b""]*Q,
            tags_u64=np.zeros((Q,), dtype=np.uint64),
            masks_views=np.zeros((3,30), dtype=np.uint64),
            masks_selected=np.zeros((30,), dtype=np.uint64),
            dedup=DedupResult(np.arange(Q,dtype=np.int32), np.ones(Q,dtype=bool), {}, [], 0)
        ), (active_indices, packed_xyz, swapcount_xyz, bits_views, masks_views, masks_sel, commits, tags, initiator_a.numpy().astype(np.int32))

    # Full execution path (no active subset)
    prim0 = build_primaries_from_nec(mag_tf, sign_tf)
    initiator = initiator_from_shared_counts(mag_tf)

    prim_x = permute_primaries_by_initiator(prim0, tf.zeros([Q], tf.int32))
    prim_y = permute_primaries_by_initiator(prim0, tf.ones([Q], tf.int32))
    prim_z = permute_primaries_by_initiator(prim0, tf.fill([Q], tf.constant(2, tf.int32)))
    prim_sel = permute_primaries_by_initiator(prim0, initiator)

    bits_x, swaps_x, _, _ = compute_bits_for_view(prim_x, True)
    bits_y, swaps_y, _, _ = compute_bits_for_view(prim_y, True)
    bits_z, swaps_z, _, _ = compute_bits_for_view(prim_z, True)
    bits_sel, _, _, _ = compute_bits_for_view(prim_sel, True)

    packed_x = pack30_to_u32_tf(bits_x)
    packed_y = pack30_to_u32_tf(bits_y)
    packed_z = pack30_to_u32_tf(bits_z)
    packed_views_u32 = tf.stack([packed_x, packed_y, packed_z], axis=1).numpy().astype(np.uint32)

    packed90_u64 = pack90_from_3x30_u32(packed_views_u32)

    swapcount_x = tf.reduce_sum(swaps_x, axis=1).numpy().astype(np.uint8)
    swapcount_y = tf.reduce_sum(swaps_y, axis=1).numpy().astype(np.uint8)
    swapcount_z = tf.reduce_sum(swaps_z, axis=1).numpy().astype(np.uint8)
    swapcount_xyz = np.stack([swapcount_x, swapcount_y, swapcount_z], axis=1).astype(np.uint8)

    commits = commit_full90(packed_views_u32, initiator.numpy().astype(np.uint8), swapcount_xyz, instr_counter)
    tags_u64 = np.array([tag_u64_from_commit(commits[q], q, instr_counter) for q in range(Q)], dtype=np.uint64)

    bits_views = np.stack([bits_x.numpy().astype(np.int32),
                           bits_y.numpy().astype(np.int32),
                           bits_z.numpy().astype(np.int32)], axis=1)

    masks_views = np.stack([bitslice_30_tf(bits_x).numpy(),
                            bitslice_30_tf(bits_y).numpy(),
                            bitslice_30_tf(bits_z).numpy()], axis=0).astype(np.uint64)
    masks_selected = bitslice_30_tf(bits_sel).numpy().astype(np.uint64)

    # Efficiency score for winner selection (prototype): fewer add/sub swaps = "cheaper"
    efficiency_score = swapcount_xyz.sum(axis=1).astype(np.int32)
    dedup = dedup_by_commit(commits, efficiency_score)

    return StepOut(
        instr_counter=instr_counter,
        mag_xyz=mag_xyz,
        base_sign=base_sign,
        initiator=initiator.numpy().astype(np.int32),
        bits_views=bits_views,
        packed_views_u32=packed_views_u32,
        packed90_u64=packed90_u64,
        swapcount_xyz=swapcount_xyz,
        commits=commits,
        tags_u64=tags_u64,
        masks_views=masks_views,
        masks_selected=masks_selected,
        dedup=dedup
    )

# -----------------------------
# Run v0.4 demonstration: Step 0 full compute -> dedup -> produce next state -> Step 1 active-only compute
# -----------------------------
tf.random.set_seed(11)
np.random.seed(11)

# Initial NEC state (encourage shared magnitudes)
mag_xyz0 = np.random.randint(0, 64, size=(Q,3), dtype=np.int32)
sign_bits = np.random.randint(0, 2, size=(Q,3), dtype=np.int32)
base_sign0 = np.where(sign_bits>0, 1, -1).astype(np.int32)

tags0 = np.zeros((Q,), dtype=np.uint64)

print("=== UniversalISA v0.4 ===")
print("MODE:", MODE)

# Step 0: full compute
step0 = execute_step(mag_xyz0, base_sign0, tags0, instr_counter=0)
print("\n--- STEP 0 ---")
print("bits_views:", step0.bits_views.shape, "packed_views_u32:", step0.packed_views_u32.shape, "packed90_u64:", step0.packed90_u64.shape)
print("initiator axis counts:", np.bincount(step0.initiator, minlength=3))
print("collision_qubits:", step0.dedup.collision_qubits, "num_groups:", len(step0.dedup.groups), "freed:", len(step0.dedup.freed))

# Build next-state (Step 1 state) using dedup policy
mag_xyz1 = step0.mag_xyz.copy()
base_sign1 = step0.base_sign.copy()
tags1 = step0.tags_u64.copy()

winner_of = step0.dedup.winner_of
active_mask = step0.dedup.active_mask
active_indices = np.where(active_mask)[0].astype(np.int32)
alias_indices = np.where(~active_mask)[0].astype(np.int32)

if MODE == "PAIR_HUNT":
    # aliases inherit winner NEC directly (stay in shared state)
    for q in alias_indices:
        w = winner_of[q]
        mag_xyz1[q] = mag_xyz1[w]
        base_sign1[q] = base_sign1[w]
else:
    # MODE == "FREE": aliases get deterministic new NEC states derived from commitment
    for q in alias_indices:
        mags, signs = derive_new_nec_for_freed(step0.commits[q], q, instr_counter=1)
        mag_xyz1[q] = mags
        base_sign1[q] = signs
        # tag also changes when they take new work:
        tags1[q] = tag_u64_from_commit(step0.commits[q], q, instr_counter=1)

print("active indices:", len(active_indices), "alias indices:", len(alias_indices))

# Step 1: compute only for ACTIVE (winners), then fill aliases
partial_shell, partial = execute_step(mag_xyz1, base_sign1, tags1, instr_counter=1, active_indices=active_indices)
(ai, packed_xyz_a, swapcount_xyz_a, bits_views_a, masks_views_a, masks_sel_a, commits_a, tags_a, initiator_a) = partial

# Build full Step1 outputs by scattering active results, then filling aliases
packed_views_u32_1 = np.zeros((Q,3), dtype=np.uint32)
swapcount_xyz_1 = np.zeros((Q,3), dtype=np.uint8)
bits_views_1 = np.zeros((Q,3,30), dtype=np.int32)
commits1 = [b""]*Q
tags_u64_1 = np.zeros((Q,), dtype=np.uint64)
initiator1 = np.full((Q,), -1, dtype=np.int32)

for i,q in enumerate(ai):
    packed_views_u32_1[q] = packed_xyz_a[i]
    swapcount_xyz_1[q] = swapcount_xyz_a[i]
    bits_views_1[q] = bits_views_a[i]
    commits1[q] = commits_a[i]
    tags_u64_1[q] = tags_a[i]
    initiator1[q] = initiator_a[i]

# Fill alias qubits depending on mode:
if MODE == "PAIR_HUNT":
    # Copy winner's computed payload/commit/tag for aliases
    for q in alias_indices:
        w = winner_of[q]
        packed_views_u32_1[q] = packed_views_u32_1[w]
        swapcount_xyz_1[q] = swapcount_xyz_1[w]
        bits_views_1[q] = bits_views_1[w]
        commits1[q] = commits1[w]
        # keep unique per-qubit tag even in pair-hunt (addressable):
        tags_u64_1[q] = tag_u64_from_commit(commits1[w], int(q), 1)
        initiator1[q] = initiator1[w]
else:
    # MODE FREE: aliases computed nothing here (freed); for demo we still need a payload object.
    # We set their payload to zeros and mark commitment as empty.
    for q in alias_indices:
        packed_views_u32_1[q] = np.array([0,0,0], dtype=np.uint32)
        swapcount_xyz_1[q] = np.array([0,0,0], dtype=np.uint8)
        bits_views_1[q] = np.zeros((3,30), dtype=np.int32)
        commits1[q] = b""
        # tag already set by deterministic reassignment above
        initiator1[q] = -1

packed90_u64_1 = pack90_from_3x30_u32(packed_views_u32_1)

# Dedup for step1 (only meaningful for active qubits; if MODE FREE, freed are intentionally different jobs)
# We compute dedup on non-empty commitments only:
efficiency_score_1 = swapcount_xyz_1.sum(axis=1).astype(np.int32)
valid = np.array([len(c)==32 for c in commits1], dtype=bool)
commits1_valid = [commits1[q] if valid[q] else (b"\x00"*32 + q.to_bytes(2,"little")) for q in range(Q)]
dedup1 = dedup_by_commit(commits1_valid, efficiency_score_1)

print("\n--- STEP 1 (active-only compute) ---")
print("computed winners:", len(active_indices), "aliases:", len(alias_indices), "MODE:", MODE)
print("step1 collision_qubits:", dedup1.collision_qubits, "num_groups:", len(dedup1.groups))
print("packed90_u64[0]:", packed90_u64_1[0].tolist(), "tag[0]:", int(tags_u64_1[0]))

# Show a few qubits
print("\nSample qubits (q, mag, sign, winner, active, packedXYZ_step0, packedXYZ_step1):")
for q in range(8):
    print(q,
          mag_xyz0[q].tolist(),
          base_sign0[q].tolist(),
          int(winner_of[q]),
          bool(active_mask[q]),
          step0.packed_views_u32[q].tolist(),
          packed_views_u32_1[q].tolist())

# Summarize reclaimed compute
print("\n=== Reclaimed compute estimate ===")
print("Step0 executed on:", Q, "qubits")
print("Step1 executed on:", len(active_indices), "qubits (winners only)")
print("Saved executions:", Q - len(active_indices))
print("If MODE=FREE, those saved qubits are available for new work in the next scheduling epoch.")

=== UniversalISA v0.4 ===
MODE: FREE

--- STEP 0 ---
bits_views: (64, 3, 30) packed_views_u32: (64, 3) packed90_u64: (64, 2)
initiator axis counts: [32 17 15]
collision_qubits: 11 num_groups: 53 freed: 11
active indices: 53 alias indices: 11

--- STEP 1 (active-only compute) ---
computed winners: 53 aliases: 11 MODE: FREE
step1 collision_qubits: 3 num_groups: 61
packed90_u64[0]: [5190005092015155620, 4825608] tag[0]: 6328507439259414915

Sample qubits (q, mag, sign, winner, active, packedXYZ_step0, packedXYZ_step1):
0 [25, 63, 16] [-1, -1, 1] 0 True [604514724, 538601608, 77209732] [604514724, 538601608, 77209732]
1 [27, 17, 55] [1, -1, -1] 1 True [76685316, 604514724, 538601608] [76685316, 604514724, 538601608]
2 [13, 12, 33] [1, 1, -1] 2 True [603990280, 538601636, 77209768] [603990280, 538601636, 77209768]
3 [7, 18, 24] [-1, 1, 1] 3 True [77209768, 604506504, 538069028] [77209768, 604506504, 538069028]
4 [45, 28, 48] [-1, -1, 1] 4 True [604514724, 538601608, 76677124] [604514724, 53

In [None]:
# UniversalISA v0.5 (Colab single-cell)
# - Defines a minimal IR/opcode format for one ISA cycle.
# - Implements a small interpreter that executes an instruction stream.
# - Preserves triplet loops + scatter updates.
# - Supports DEDUP + FREE/ALIAS and an "EXEC_ACTIVE_ONLY" optimization that skips redundant qubits.

import tensorflow as tf
import numpy as np
import hashlib
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

# -----------------------------
# Global constants / fixed geometry
# -----------------------------
Q = 64
TAU_HI      = 1.0
TAU_LOW     = -1.0
EPS         = 1e-6
R_FOR_RATIO = 64.0

PRIME_MASK_30 = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# 10 triplets over 30 slots: [0..2],[3..5],...,[27..29]
TRIPLET_IDX = tf.constant([[3*t,3*t+1,3*t+2] for t in range(10)], dtype=tf.int32)

# -----------------------------
# IR / Opcode model
# -----------------------------
@dataclass
class Instr:
    op: str
    args: Dict[str, Any] = field(default_factory=dict)

# -----------------------------
# Utilities: pack/bitslice
# -----------------------------
def pack30_to_u32_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    bits_u32 = tf.cast(bits30_i32, tf.uint32)               # [Q,30]
    shifts = tf.cast(tf.range(30), tf.uint32)               # [30]
    return tf.reduce_sum(tf.bitwise.left_shift(bits_u32, shifts), axis=1)  # [Q] u32

def bitslice_30_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    # [Q,30] -> [30] uint64 lane masks (Q<=64)
    weights = tf.bitwise.left_shift(tf.constant(1, tf.uint64), tf.cast(tf.range(tf.shape(bits30_i32)[0]), tf.uint64))
    bits_u64 = tf.cast(bits30_i32, tf.uint64)
    return tf.reduce_sum(tf.transpose(bits_u64, [1,0]) * tf.expand_dims(weights, axis=0), axis=1)

def pack90_from_3x30_u32(packed3_u32: np.ndarray) -> np.ndarray:
    # packed3_u32: [Q,3] uint32 -> [Q,2] uint64 (90 bits packed)
    out = np.zeros((packed3_u32.shape[0],2), dtype=np.uint64)
    for q in range(packed3_u32.shape[0]):
        b0 = np.uint64(packed3_u32[q,0] & np.uint32((1<<30)-1))
        b1 = np.uint64(packed3_u32[q,1] & np.uint32((1<<30)-1))
        b2 = np.uint64(packed3_u32[q,2] & np.uint32((1<<30)-1))
        low = b0 | (b1 << np.uint64(30)) | ((b2 & np.uint64(0xF)) << np.uint64(60))
        high = (b2 >> np.uint64(4))
        out[q,0] = low
        out[q,1] = high
    return out

# -----------------------------
# Phase-dual core ops
# -----------------------------
def add_pd(a,b): return a+b
def mul_pd(a,b): return a*b
def div_pd(a,b): return tf.where(tf.abs(b) > EPS, a/b, tf.zeros_like(a))

# -----------------------------
# NEC selectors (true phase-dual) + initiator logic
# -----------------------------
def sel_real(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([m, -m], axis=1)  # [+m,-m]

def sel_unreal(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([-m, m], axis=1)  # [-m,+m]

def build_primaries_from_nec(mag_xyz_i32: tf.Tensor, base_sign_xyz_i32: tf.Tensor) -> tf.Tensor:
    mag = tf.cast(mag_xyz_i32, tf.float32)
    mX, mY, mZ = mag[:,0], mag[:,1], mag[:,2]
    Xr, Xu = sel_real(mX), sel_unreal(mX)
    Yr, Yu = sel_real(mY), sel_unreal(mY)
    Zr, Zu = sel_real(mZ), sel_unreal(mZ)

    sign_is_unreal = base_sign_xyz_i32 < 0  # [Q,3]
    X0 = tf.where(sign_is_unreal[:,0:1], Xu, Xr)
    X1 = tf.where(sign_is_unreal[:,0:1], Xr, Xu)
    Y0 = tf.where(sign_is_unreal[:,1:2], Yu, Yr)
    Y1 = tf.where(sign_is_unreal[:,1:2], Yr, Yu)
    Z0 = tf.where(sign_is_unreal[:,2:3], Zu, Zr)
    Z1 = tf.where(sign_is_unreal[:,2:3], Zr, Zu)
    return tf.stack([X0,X1,Y0,Y1,Z0,Z1], axis=1)  # [Q,6,2]

def initiator_from_shared_counts(mag_xyz_i32: tf.Tensor) -> tf.Tensor:
    mag = mag_xyz_i32
    eqx = tf.equal(tf.expand_dims(mag[:,0], 1), tf.expand_dims(mag[:,0], 0))
    eqy = tf.equal(tf.expand_dims(mag[:,1], 1), tf.expand_dims(mag[:,1], 0))
    eqz = tf.equal(tf.expand_dims(mag[:,2], 1), tf.expand_dims(mag[:,2], 0))
    cx = tf.reduce_sum(tf.cast(eqx, tf.int32), axis=1)
    cy = tf.reduce_sum(tf.cast(eqy, tf.int32), axis=1)
    cz = tf.reduce_sum(tf.cast(eqz, tf.int32), axis=1)

    counts = tf.stack([cx,cy,cz], axis=1)  # [Q,3]
    shared_mask = counts > 1
    shared_count = tf.reduce_sum(tf.cast(shared_mask, tf.int32), axis=1)

    init_one_shared = tf.argmax(counts, axis=1, output_type=tf.int32)
    is_unshared = tf.equal(counts, 1)
    init_two_shared = tf.argmax(tf.cast(is_unshared, tf.int32), axis=1, output_type=tf.int32)
    init_three_shared = tf.argmax(counts, axis=1, output_type=tf.int32)

    bias = tf.constant([2,1,0], tf.int32)
    mags_biased = mag_xyz_i32 * 1000 + bias
    init_none_shared = tf.argmax(mags_biased, axis=1, output_type=tf.int32)

    initiator = tf.where(shared_count == 1, init_one_shared,
                 tf.where(shared_count == 2, init_two_shared,
                 tf.where(shared_count == 3, init_three_shared, init_none_shared)))
    return initiator

def permute_primaries_by_initiator(prim6: tf.Tensor, initiator_axis: tf.Tensor) -> tf.Tensor:
    Qn = tf.shape(prim6)[0]
    idx_x = tf.constant([0,1,2,3,4,5], tf.int32)
    idx_y = tf.constant([2,3,4,5,0,1], tf.int32)
    idx_z = tf.constant([4,5,0,1,2,3], tf.int32)

    idx = tf.where(tf.expand_dims(initiator_axis==0,1), tf.broadcast_to(idx_x, [Qn,6]),
          tf.where(tf.expand_dims(initiator_axis==1,1), tf.broadcast_to(idx_y, [Qn,6]),
                                                          tf.broadcast_to(idx_z, [Qn,6])))
    return tf.gather(prim6, idx, axis=1, batch_dims=1)

# -----------------------------
# REG30 build: 10 triplets, (+,-) reorderable; (*,/) fixed
# -----------------------------
def build_register30(prim6: tf.Tensor, canonicalize_addsub: bool = True) -> Tuple[tf.Tensor, tf.Tensor]:
    p0,p1,p2,p3,p4,p5 = tf.unstack(prim6, axis=1)
    A0,A1 = p0,p1
    B0,B1 = p2,p3
    C0,C1 = p4,p5

    spec = [
        (A0, B0, "MUL"),
        (B0, C0, "MUL"),
        (A0, C0, "MUL"),
        (A1, B1, "MUL"),
        (A0, B1, "DIV"),
        (B0, C1, "DIV"),
        (A1, C0, "DIV"),
        (B1, C1, "DIV"),
    ]

    add_list, sub_list, op3_list, swap_flags = [], [], [], []
    for (u,v,op3) in spec:
        addv = add_pd(u,v)
        subv = u - v
        opv  = mul_pd(u,v) if op3=="MUL" else div_pd(u,v)

        if canonicalize_addsub:
            swap = tf.cast(subv[:,0] > addv[:,0], tf.int32)  # [Q]
            addv2 = tf.where(swap[:,None] > 0, subv, addv)
            subv2 = tf.where(swap[:,None] > 0, addv, subv)
            addv, subv = addv2, subv2
        else:
            swap = tf.zeros([tf.shape(u)[0]], tf.int32)

        add_list.append(addv); sub_list.append(subv); op3_list.append(opv); swap_flags.append(swap)

    reg = [p0,p1,p2, p3,p4,p5]
    for i in range(8):
        reg.extend([add_list[i], sub_list[i], op3_list[i]])
    reg = tf.stack(reg, axis=1)               # [Q,30,2]
    swap_flags = tf.stack(swap_flags, axis=1) # [Q,8]
    return reg, swap_flags

# -----------------------------
# COLLAPSE (triplet loops + scatter updates preserved)
# -----------------------------
def detect_collapse_triplet_scatter(pairs: tf.Tensor,
                                   tau_hi: float = TAU_HI,
                                   tau_low: float = TAU_LOW,
                                   r_for_ratio: float = R_FOR_RATIO) -> tf.Tensor:
    real = pairs[..., 0]
    unreal = pairs[..., 1]
    Qn = tf.shape(pairs)[0]

    cond1 = tf.logical_and(real >= tau_hi, unreal <= tau_low)
    ratio = tf.where(tf.abs(unreal) > EPS, real / unreal, tf.zeros_like(real))
    cond2 = ratio > r_for_ratio
    individual = tf.logical_or(cond1, cond2)

    final_mask = tf.cast(individual, tf.int32)

    for t in tf.range(10):
        idx3 = TRIPLET_IDX[t]
        trip_ind = tf.gather(individual, idx3, axis=1)
        is_uniform = tf.reduce_all(tf.equal(trip_ind, trip_ind[:,0:1]), axis=1)
        uniform_val = tf.cast(trip_ind[:,0], tf.int32)

        updates = tf.where(
            tf.expand_dims(is_uniform, axis=1),
            tf.tile(tf.expand_dims(uniform_val, axis=1), [1,3]),
            tf.cast(trip_ind, tf.int32)
        )

        q_idx = tf.repeat(tf.range(Qn), repeats=3)
        p_idx = tf.tile(idx3, multiples=[Qn])
        scatter_idx = tf.stack([q_idx, p_idx], axis=1)
        final_mask = tf.tensor_scatter_nd_update(final_mask, scatter_idx, tf.reshape(updates, [-1]))

    return final_mask

# -----------------------------
# PARITY + BITMAP
# -----------------------------
def apply_parity_rotation(pairs: tf.Tensor, collapse_mask: tf.Tensor):
    Qn = tf.shape(pairs)[0]
    prime = tf.broadcast_to(PRIME_MASK_30[tf.newaxis,:], [Qn,30])
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32)
    sign = tf.where(affected > 0, tf.constant(-1.0, tf.float32), tf.constant(1.0, tf.float32))
    return pairs * sign[...,None], affected

def bitmap(rotated_pairs: tf.Tensor) -> tf.Tensor:
    return tf.cast(rotated_pairs[...,0] > EPS, tf.int32)

# -----------------------------
# Commit/tag/dedup/free
# -----------------------------
def commit_full90(packed_u32_xyz: np.ndarray,
                  initiator_axis: np.ndarray,
                  swapcount_xyz: np.ndarray,
                  instr_counter: int,
                  domain_sep: bytes = b"NTHISA90") -> List[bytes]:
    commits = []
    for q in range(packed_u32_xyz.shape[0]):
        msg = (
            domain_sep +
            int(instr_counter).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,0]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,1]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,2]).to_bytes(4,"little",signed=False) +
            int(initiator_axis[q]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,0]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,1]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,2]).to_bytes(1,"little",signed=False)
        )
        commits.append(hashlib.blake2s(msg, digest_size=32).digest())
    return commits

def tag_u64_from_commit(commit32: bytes, q_idx: int, instr_counter: int) -> np.uint64:
    msg = b"NTH_TAG0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    h = hashlib.blake2s(msg, digest_size=8).digest()
    return np.uint64(int.from_bytes(h, "little", signed=False))

@dataclass
class DedupResult:
    winner_of: np.ndarray
    active_mask: np.ndarray
    groups: Dict[bytes, List[int]]
    freed: List[int]
    collision_qubits: int

def dedup_by_commit(commits: List[bytes], efficiency_score: np.ndarray, mode: str) -> DedupResult:
    groups: Dict[bytes, List[int]] = {}
    for q,c in enumerate(commits):
        groups.setdefault(c, []).append(q)

    winner_of = np.arange(len(commits), dtype=np.int32)
    active_mask = np.ones(len(commits), dtype=bool)
    freed: List[int] = []
    collision_qubits = 0

    for c, qs in groups.items():
        if len(qs) > 1:
            collision_qubits += (len(qs) - 1)

        # winner = min efficiency_score, tie -> min q index
        qs_sorted = sorted(qs, key=lambda q: (int(efficiency_score[q]), q))
        w = qs_sorted[0]
        for q in qs:
            winner_of[q] = w
            if q != w:
                active_mask[q] = False
                if mode == "FREE":
                    freed.append(q)

    return DedupResult(winner_of, active_mask, groups, freed, collision_qubits)

def derive_new_nec_for_freed(commit32: bytes, q_idx: int, instr_counter: int) -> Tuple[np.ndarray, np.ndarray]:
    msg = b"NTH_FREE0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    raw = hashlib.blake2s(msg, digest_size=16).digest()
    mags = np.array([raw[0] % 64, raw[1] % 64, raw[2] % 64], dtype=np.int32)
    sb = raw[3]
    signs = np.array([1 if (sb & 1) else -1, 1 if (sb & 2) else -1, 1 if (sb & 4) else -1], dtype=np.int32)
    return mags, signs

# -----------------------------
# Interpreter state
# -----------------------------
@dataclass
class ISAState:
    instr_counter: int = 0
    mode: str = "FREE"                  # FREE or PAIR_HUNT
    exec_active_only: bool = False      # if True, heavy ops run only on active winners

    # NEC state
    mag_xyz: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.int32))
    base_sign: np.ndarray = field(default_factory=lambda: np.ones((Q,3), dtype=np.int32))

    # Derived registers
    initiator: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.int32))
    packed_views_u32: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.uint32))
    packed90_u64: np.ndarray = field(default_factory=lambda: np.zeros((Q,2), dtype=np.uint64))
    bits_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,30), dtype=np.int32))
    swapcount_xyz: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.uint8))
    commits: List[bytes] = field(default_factory=lambda: [b""]*Q)
    tags_u64: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.uint64))

    # Dedup scheduling
    dedup: Optional[DedupResult] = None
    active_mask: np.ndarray = field(default_factory=lambda: np.ones((Q,), dtype=bool))
    winner_of: np.ndarray = field(default_factory=lambda: np.arange(Q, dtype=np.int32))

# -----------------------------
# Interpreter core: ops
# -----------------------------
def op_NEC_LOAD(st: ISAState, seed: int = 11, mag_range: int = 64):
    rng = np.random.default_rng(seed + st.instr_counter)
    st.mag_xyz = rng.integers(0, mag_range, size=(Q,3), dtype=np.int32)
    sign_bits = rng.integers(0, 2, size=(Q,3), dtype=np.int32)
    st.base_sign = np.where(sign_bits > 0, 1, -1).astype(np.int32)

def op_INIT_SELECT(st: ISAState):
    mag_tf = tf.constant(st.mag_xyz, dtype=tf.int32)
    st.initiator = initiator_from_shared_counts(mag_tf).numpy().astype(np.int32)

def _compute_views_full(st: ISAState):
    mag_tf  = tf.constant(st.mag_xyz, dtype=tf.int32)
    sign_tf = tf.constant(st.base_sign, dtype=tf.int32)

    prim0 = build_primaries_from_nec(mag_tf, sign_tf)  # [Q,6,2]
    init_tf = tf.constant(st.initiator, dtype=tf.int32)

    prim_x = permute_primaries_by_initiator(prim0, tf.zeros([Q], tf.int32))
    prim_y = permute_primaries_by_initiator(prim0, tf.ones([Q], tf.int32))
    prim_z = permute_primaries_by_initiator(prim0, tf.fill([Q], tf.constant(2, tf.int32)))

    def eval_view(prim6):
        reg30, swaps = build_register30(prim6, canonicalize_addsub=True)
        collapse = detect_collapse_triplet_scatter(reg30)
        rotated, _ = apply_parity_rotation(reg30, collapse)
        bits = bitmap(rotated)
        packed = pack30_to_u32_tf(bits)
        swapcount = tf.reduce_sum(swaps, axis=1)
        return bits, packed, swapcount

    bits_x, packed_x, swap_x = eval_view(prim_x)
    bits_y, packed_y, swap_y = eval_view(prim_y)
    bits_z, packed_z, swap_z = eval_view(prim_z)

    st.bits_views = np.stack([bits_x.numpy().astype(np.int32),
                              bits_y.numpy().astype(np.int32),
                              bits_z.numpy().astype(np.int32)], axis=1)
    st.packed_views_u32 = tf.stack([packed_x, packed_y, packed_z], axis=1).numpy().astype(np.uint32)
    st.swapcount_xyz = np.stack([swap_x.numpy().astype(np.uint8),
                                 swap_y.numpy().astype(np.uint8),
                                 swap_z.numpy().astype(np.uint8)], axis=1).astype(np.uint8)
    st.packed90_u64 = pack90_from_3x30_u32(st.packed_views_u32)

def _compute_views_active_only(st: ISAState):
    # Compute heavy pipeline only for active winners, then fill aliases based on mode
    active_idx = np.where(st.active_mask)[0].astype(np.int32)
    alias_idx = np.where(~st.active_mask)[0].astype(np.int32)
    Qa = int(active_idx.shape[0])

    if Qa == 0:
        # nothing active; clear outputs
        st.bits_views[:] = 0
        st.packed_views_u32[:] = 0
        st.swapcount_xyz[:] = 0
        st.packed90_u64[:] = 0
        st.commits = [b""]*Q
        st.tags_u64[:] = 0
        return

    mag_a  = tf.constant(st.mag_xyz[active_idx], dtype=tf.int32)
    sign_a = tf.constant(st.base_sign[active_idx], dtype=tf.int32)

    prim0_a = build_primaries_from_nec(mag_a, sign_a)
    # initiator for active subset only (optimization)
    init_a = initiator_from_shared_counts(mag_a)

    prim_x = permute_primaries_by_initiator(prim0_a, tf.zeros([Qa], tf.int32))
    prim_y = permute_primaries_by_initiator(prim0_a, tf.ones([Qa], tf.int32))
    prim_z = permute_primaries_by_initiator(prim0_a, tf.fill([Qa], tf.constant(2, tf.int32)))

    def eval_view(prim6):
        reg30, swaps = build_register30(prim6, canonicalize_addsub=True)
        collapse = detect_collapse_triplet_scatter(reg30)
        rotated, _ = apply_parity_rotation(reg30, collapse)
        bits = bitmap(rotated)
        packed = pack30_to_u32_tf(bits)
        swapcount = tf.reduce_sum(swaps, axis=1)
        return bits, packed, swapcount

    bits_x, packed_x, swap_x = eval_view(prim_x)
    bits_y, packed_y, swap_y = eval_view(prim_y)
    bits_z, packed_z, swap_z = eval_view(prim_z)

    bits_views_a = np.stack([bits_x.numpy().astype(np.int32),
                             bits_y.numpy().astype(np.int32),
                             bits_z.numpy().astype(np.int32)], axis=1)
    packed_a = tf.stack([packed_x, packed_y, packed_z], axis=1).numpy().astype(np.uint32)
    swapcount_a = np.stack([swap_x.numpy().astype(np.uint8),
                            swap_y.numpy().astype(np.uint8),
                            swap_z.numpy().astype(np.uint8)], axis=1).astype(np.uint8)

    # Scatter active results into full arrays
    st.bits_views[:] = 0
    st.packed_views_u32[:] = 0
    st.swapcount_xyz[:] = 0
    for i,q in enumerate(active_idx):
        st.bits_views[q] = bits_views_a[i]
        st.packed_views_u32[q] = packed_a[i]
        st.swapcount_xyz[q] = swapcount_a[i]

    # Fill aliases
    if st.mode == "PAIR_HUNT":
        # aliases copy winner payload
        for q in alias_idx:
            w = st.winner_of[q]
            st.bits_views[q] = st.bits_views[w]
            st.packed_views_u32[q] = st.packed_views_u32[w]
            st.swapcount_xyz[q] = st.swapcount_xyz[w]
    else:
        # FREE: freed qubits may be doing new work later; for now keep payload=0 (uncomputed this cycle)
        pass

    st.packed90_u64 = pack90_from_3x30_u32(st.packed_views_u32)

def op_REG30_BUILD_BITMAP_PACK(st: ISAState):
    # This op is a fused execution of: REG30_BUILD + COLLAPSE + PARITY + BITMAP + PACK
    # It exists because a real backend would fuse these kernels, but the IR still names the primitives.
    if st.exec_active_only and st.dedup is not None:
        _compute_views_active_only(st)
    else:
        _compute_views_full(st)

def op_COMMIT(st: ISAState):
    st.commits = commit_full90(st.packed_views_u32,
                              st.initiator.astype(np.uint8),
                              st.swapcount_xyz.astype(np.uint8),
                              st.instr_counter)
    st.tags_u64 = np.array([tag_u64_from_commit(st.commits[q], q, st.instr_counter) for q in range(Q)], dtype=np.uint64)

def op_DEDUP(st: ISAState):
    # Efficiency score prototype: fewer swaps => cheaper
    eff = st.swapcount_xyz.sum(axis=1).astype(np.int32)
    st.dedup = dedup_by_commit(st.commits, eff, st.mode)
    st.active_mask = st.dedup.active_mask.copy()
    st.winner_of = st.dedup.winner_of.copy()

def op_FREE_ALIAS(st: ISAState):
    if st.dedup is None:
        return
    alias_idx = np.where(~st.active_mask)[0].astype(np.int32)

    if st.mode == "PAIR_HUNT":
        # aliases inherit winner NEC as well (remain paired)
        for q in alias_idx:
            w = st.winner_of[q]
            st.mag_xyz[q] = st.mag_xyz[w]
            st.base_sign[q] = st.base_sign[w]
    else:
        # FREE: reassign alias qubits to new NEC states deterministically from their own commit
        for q in alias_idx:
            mags, signs = derive_new_nec_for_freed(st.commits[q], q, st.instr_counter + 1)
            st.mag_xyz[q] = mags
            st.base_sign[q] = signs

def op_EXEC_ACTIVE_ONLY(st: ISAState, enabled: bool = True):
    st.exec_active_only = bool(enabled)

def op_NEXT(st: ISAState):
    st.instr_counter += 1

# -----------------------------
# Interpreter runner
# -----------------------------
OP_TABLE = {
    "NEC_LOAD": op_NEC_LOAD,
    "INIT_SELECT": op_INIT_SELECT,
    "REG30_BUILD": op_REG30_BUILD_BITMAP_PACK,    # fused for now
    "COLLAPSE": lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "PARITY":   lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "BITMAP":   lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "PACK":     lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "COMMIT": op_COMMIT,
    "DEDUP": op_DEDUP,
    "FREE_ALIAS": op_FREE_ALIAS,
    "EXEC_ACTIVE_ONLY": op_EXEC_ACTIVE_ONLY,
    "NEXT": op_NEXT,
}

def run_program(st: ISAState, program: List[Instr], verbose: bool = True):
    for ins in program:
        fn = OP_TABLE.get(ins.op)
        if fn is None:
            raise ValueError(f"Unknown opcode: {ins.op}")
        fn(st, **ins.args)
        if verbose and ins.op in ("REG30_BUILD","COMMIT","DEDUP","FREE_ALIAS","NEXT"):
            if ins.op == "REG30_BUILD":
                print(f"[t={st.instr_counter}] REG30_BUILD done. packed90_u64[0]={st.packed90_u64[0].tolist()}")
            elif ins.op == "COMMIT":
                print(f"[t={st.instr_counter}] COMMIT done. commit0={st.commits[0].hex()[:16]}...")
            elif ins.op == "DEDUP":
                print(f"[t={st.instr_counter}] DEDUP done. collisions={st.dedup.collision_qubits} groups={len(st.dedup.groups)} active={int(st.active_mask.sum())}")
            elif ins.op == "FREE_ALIAS":
                print(f"[t={st.instr_counter}] FREE/ALIAS applied. mode={st.mode} aliases={Q-int(st.active_mask.sum())}")
            elif ins.op == "NEXT":
                print(f"--- NEXT instr_counter={st.instr_counter} ---")

# -----------------------------
# Demo microprogram: two instruction cycles with DEDUP + FREE/ALIAS + EXEC_ACTIVE_ONLY
# -----------------------------
st = ISAState(mode="FREE", instr_counter=0)
np.random.seed(11)
tf.random.set_seed(11)

program = [
    Instr("NEC_LOAD", {"seed": 11}),
    Instr("INIT_SELECT"),
    Instr("REG30_BUILD"),
    Instr("COMMIT"),
    Instr("DEDUP"),
    Instr("FREE_ALIAS"),
    Instr("EXEC_ACTIVE_ONLY", {"enabled": True}),
    Instr("NEXT"),

    # second cycle (will compute heavy ops only for active winners if exec_active_only=True)
    Instr("INIT_SELECT"),
    Instr("REG30_BUILD"),
    Instr("COMMIT"),
    Instr("DEDUP"),
    Instr("FREE_ALIAS"),
    Instr("NEXT"),
]

print("=== UniversalISA v0.5 Interpreter Demo ===")
print("MODE:", st.mode, "EXEC_ACTIVE_ONLY initially:", st.exec_active_only)
run_program(st, program, verbose=True)

print("\n=== Final State Snapshot ===")
print("instr_counter:", st.instr_counter)
print("initiator axis counts:", np.bincount(st.initiator, minlength=3).tolist(), "(0=x,1=y,2=z)")
print("packed_views_u32[0]:", st.packed_views_u32[0].tolist())
print("packed90_u64[0]:", st.packed90_u64[0].tolist())
print("active qubits:", int(st.active_mask.sum()), "aliases:", Q-int(st.active_mask.sum()))
print("tag[0]:", int(st.tags_u64[0]))
print("Sample qubits (q, mag, sign, winner, active):")
for q in range(8):
    print(q, st.mag_xyz[q].tolist(), st.base_sign[q].tolist(), int(st.winner_of[q]), bool(st.active_mask[q]))

=== UniversalISA v0.5 Interpreter Demo ===
MODE: FREE EXEC_ACTIVE_ONLY initially: False
[t=0] REG30_BUILD done. packed90_u64[0]=[9232960233523929384, 525330]
[t=0] COMMIT done. commit0=7ee6f4f8306e0b3f...
[t=0] DEDUP done. collisions=15 groups=49 active=49
[t=0] FREE/ALIAS applied. mode=FREE aliases=15
--- NEXT instr_counter=1 ---
[t=1] REG30_BUILD done. packed90_u64[0]=[9232960233523929384, 525330]
[t=1] COMMIT done. commit0=c1596df8c9ec1a9c...
[t=1] DEDUP done. collisions=15 groups=49 active=49
[t=1] FREE/ALIAS applied. mode=FREE aliases=15
--- NEXT instr_counter=2 ---

=== Final State Snapshot ===
instr_counter: 2
initiator axis counts: [21, 18, 25] (0=x,1=y,2=z)
packed_views_u32[0]: [8413480, 8929704, 8405288]
packed90_u64[0]: [9232960233523929384, 525330]
active qubits: 49 aliases: 15
tag[0]: 15683781815185925009
Sample qubits (q, mag, sign, winner, active):
0 [8, 8, 51] [1, 1, 1] 0 True
1 [31, 37, 38] [1, -1, -1] 1 True
2 [45, 1, 31] [1, 1, -1] 2 True
3 [9, 25, 59] [-1, 1, -1] 3 

V0.5.1 Patch

In [None]:
# UniversalISA prototype v0.5.1 (Colab single-cell)
# Patch: Switched Commit domain_sep for commit_full90 to include Q to address collision
# Adds to v0.3:
# - Batch-level DEDUP/ALIAS (state sharing detection) keyed off FULL 90-bit commitment
# - Winner selection + alias map + freed qubits list
# - Feed-forward state registers (tag_u64, initiator, swap counts)
# - Second instruction step that executes only ACTIVE (winner) qubits and fills aliases deterministically
# - Mode switch:
#     MODE="FREE"      => aliases are reassigned new NEC work states for next instruction
#     MODE="PAIR_HUNT" => aliases stay tied to winner state (useful for paired-state hunting)

import tensorflow as tf
import numpy as np
import hashlib
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

# -----------------------------
# Global constants / fixed geometry
# -----------------------------
Q = 64
TAU_HI      = 1.0
TAU_LOW     = -1.0
EPS         = 1e-6
R_FOR_RATIO = 64.0

PRIME_MASK_30 = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# 10 triplets over 30 slots: [0..2],[3..5],...,[27..29]
TRIPLET_IDX = tf.constant([[3*t,3*t+1,3*t+2] for t in range(10)], dtype=tf.int32)

# -----------------------------
# Pack/bitslice utilities
# -----------------------------
def pack30_to_u32_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    bits_u32 = tf.cast(bits30_i32, tf.uint32)
    shifts = tf.cast(tf.range(30), tf.uint32)
    return tf.reduce_sum(tf.bitwise.left_shift(bits_u32, shifts), axis=1)  # [Q] u32

def bitslice_30_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    weights = tf.bitwise.left_shift(tf.constant(1, tf.uint64), tf.cast(tf.range(tf.shape(bits30_i32)[0]), tf.uint64))
    bits_u64 = tf.cast(bits30_i32, tf.uint64)
    return tf.reduce_sum(tf.transpose(bits_u64, [1,0]) * tf.expand_dims(weights, axis=0), axis=1)

def pack90_from_3x30_u32(packed3_u32: np.ndarray) -> np.ndarray:
    """
    packed3_u32: [Q,3] uint32, each holds 30 bits
    Returns: [Q,2] uint64 (90 bits packed)
    """
    out = np.zeros((packed3_u32.shape[0],2), dtype=np.uint64)
    for q in range(packed3_u32.shape[0]):
        b0 = np.uint64(packed3_u32[q,0] & np.uint32((1<<30)-1))
        b1 = np.uint64(packed3_u32[q,1] & np.uint32((1<<30)-1))
        b2 = np.uint64(packed3_u32[q,2] & np.uint32((1<<30)-1))
        low = b0 | (b1 << np.uint64(30)) | ((b2 & np.uint64(0xF)) << np.uint64(60))
        high = (b2 >> np.uint64(4))
        out[q,0] = low
        out[q,1] = high
    return out

# -----------------------------
# Phase-dual core ops
# -----------------------------
def add_pd(a,b): return a+b
def mul_pd(a,b): return a*b
def div_pd(a,b): return tf.where(tf.abs(b) > EPS, a/b, tf.zeros_like(a))

# -----------------------------
# NEC selectors (true phase-dual) + initiator logic
# -----------------------------
def sel_real(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([m, -m], axis=1)  # [+m,-m]

def sel_unreal(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([-m, m], axis=1)  # [-m,+m]

def build_primaries_from_nec(mag_xyz_i32: tf.Tensor, base_sign_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    mag_xyz_i32: [Q,3] int32 magnitudes >=0 (NEC absolute coordinate)
    base_sign_xyz_i32: [Q,3] int32 in {-1,+1} giving base NEC sign per axis:
      +1 => axis initiated real  => pair order (RealSelector, UnrealSelector)
      -1 => axis initiated unreal=> pair order (UnrealSelector, RealSelector)

    Returns primaries [Q,6,2] float32 in canonical pair sequence:
      [X0, X1, Y0, Y1, Z0, Z1] where each Xi/Yi/Zi is itself a 2-vector [real, unreal].
    """
    mag = tf.cast(mag_xyz_i32, tf.float32)
    mX, mY, mZ = mag[:,0], mag[:,1], mag[:,2]
    Xr, Xu = sel_real(mX), sel_unreal(mX)
    Yr, Yu = sel_real(mY), sel_unreal(mY)
    Zr, Zu = sel_real(mZ), sel_unreal(mZ)

    sign_is_unreal = base_sign_xyz_i32 < 0  # [Q,3]
    X0 = tf.where(sign_is_unreal[:,0:1], Xu, Xr)
    X1 = tf.where(sign_is_unreal[:,0:1], Xr, Xu)
    Y0 = tf.where(sign_is_unreal[:,1:2], Yu, Yr)
    Y1 = tf.where(sign_is_unreal[:,1:2], Yr, Yu)
    Z0 = tf.where(sign_is_unreal[:,2:3], Zu, Zr)
    Z1 = tf.where(sign_is_unreal[:,2:3], Zr, Zu)
    return tf.stack([X0,X1,Y0,Y1,Z0,Z1], axis=1)  # [Q,6,2]

def initiator_from_shared_counts(mag_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    Your initiator rule using shared axis magnitudes across the array.
    Returns initiator axis index per qubit: 0=x, 1=y, 2=z
    """
    mag = mag_xyz_i32  # [Q,3], int32
    eqx = tf.equal(tf.expand_dims(mag[:,0], 1), tf.expand_dims(mag[:,0], 0))
    eqy = tf.equal(tf.expand_dims(mag[:,1], 1), tf.expand_dims(mag[:,1], 0))
    eqz = tf.equal(tf.expand_dims(mag[:,2], 1), tf.expand_dims(mag[:,2], 0))
    cx = tf.reduce_sum(tf.cast(eqx, tf.int32), axis=1)
    cy = tf.reduce_sum(tf.cast(eqy, tf.int32), axis=1)
    cz = tf.reduce_sum(tf.cast(eqz, tf.int32), axis=1)

    counts = tf.stack([cx,cy,cz], axis=1)  # [Q,3]
    shared_mask = counts > 1
    shared_count = tf.reduce_sum(tf.cast(shared_mask, tf.int32), axis=1)  # [Q]

    init_one_shared = tf.argmax(counts, axis=1, output_type=tf.int32)
    is_unshared = tf.equal(counts, 1)
    init_two_shared = tf.argmax(tf.cast(is_unshared, tf.int32), axis=1, output_type=tf.int32)
    init_three_shared = tf.argmax(counts, axis=1, output_type=tf.int32)

    bias = tf.constant([2,1,0], tf.int32)
    mags_biased = mag_xyz_i32 * 1000 + bias
    init_none_shared = tf.argmax(mags_biased, axis=1, output_type=tf.int32)

    initiator = tf.where(shared_count == 1, init_one_shared,
                 tf.where(shared_count == 2, init_two_shared,
                 tf.where(shared_count == 3, init_three_shared, init_none_shared)))
    return initiator  # [Q]

def permute_primaries_by_initiator(prim6: tf.Tensor, initiator_axis: tf.Tensor) -> tf.Tensor:
    Qn = tf.shape(prim6)[0]
    idx_x = tf.constant([0,1,2,3,4,5], tf.int32)
    idx_y = tf.constant([2,3,4,5,0,1], tf.int32)
    idx_z = tf.constant([4,5,0,1,2,3], tf.int32)

    idx = tf.where(tf.expand_dims(initiator_axis==0,1), tf.broadcast_to(idx_x, [Qn,6]),
          tf.where(tf.expand_dims(initiator_axis==1,1), tf.broadcast_to(idx_y, [Qn,6]),
                                                          tf.broadcast_to(idx_z, [Qn,6])))

    return tf.gather(prim6, idx, axis=1, batch_dims=1)

# -----------------------------
# 30-register with (+,-,*,/) semantics and add/sub-only reorder (canonicalization)
# -----------------------------
def build_register30(prim6: tf.Tensor,
                     canonicalize_addsub: bool = True) -> Tuple[tf.Tensor, tf.Tensor]:
    p0,p1,p2,p3,p4,p5 = tf.unstack(prim6, axis=1)
    A0,A1 = p0,p1
    B0,B1 = p2,p3
    C0,C1 = p4,p5

    spec = [
        (A0, B0, "MUL"),
        (B0, C0, "MUL"),
        (A0, C0, "MUL"),
        (A1, B1, "MUL"),
        (A0, B1, "DIV"),
        (B0, C1, "DIV"),
        (A1, C0, "DIV"),
        (B1, C1, "DIV"),
    ]

    add_list = []
    sub_list = []
    op3_list = []
    swap_flags = []

    for (u,v,op3) in spec:
        addv = add_pd(u,v)
        subv = u - v
        opv  = mul_pd(u,v) if op3=="MUL" else div_pd(u,v)

        if canonicalize_addsub:
            swap = tf.cast(subv[:,0] > addv[:,0], tf.int32)  # [Q]
            addv2 = tf.where(swap[:,None] > 0, subv, addv)
            subv2 = tf.where(swap[:,None] > 0, addv, subv)
            addv, subv = addv2, subv2
        else:
            swap = tf.zeros([tf.shape(u)[0]], tf.int32)

        # Patch v0.5.1: Ensure swap is always appended
        swap_flags.append(swap)

        add_list.append(addv)
        sub_list.append(subv)
        op3_list.append(opv)

    reg = [p0,p1,p2, p3,p4,p5]
    for i in range(8):
        reg.extend([add_list[i], sub_list[i], op3_list[i]])

    reg = tf.stack(reg, axis=1)  # [Q,30,2]
    swap_flags = tf.stack(swap_flags, axis=1)  # [Q,8]
    return reg, swap_flags

# -----------------------------
# Collapse/parity/bitmap with triplet loop + scatter update retained
# -----------------------------
def detect_collapse_triplet_scatter(pairs: tf.Tensor,
                                   tau_hi: float = TAU_HI,
                                   tau_low: float = TAU_LOW,
                                   r_for_ratio: float = R_FOR_RATIO) -> tf.Tensor:
    real = pairs[..., 0]
    unreal = pairs[..., 1]
    Qn = tf.shape(pairs)[0]

    cond1 = tf.logical_and(real >= tau_hi, unreal <= tau_low)
    ratio = tf.where(tf.abs(unreal) > EPS, real / unreal, tf.zeros_like(real))
    cond2 = ratio > r_for_ratio
    individual = tf.logical_or(cond1, cond2)

    final_mask = tf.cast(individual, tf.int32)

    for t in tf.range(10):
        idx3 = TRIPLET_IDX[t]
        trip_ind = tf.gather(individual, idx3, axis=1)
        is_uniform = tf.reduce_all(tf.equal(trip_ind, trip_ind[:, 0:1]), axis=1)
        uniform_val = tf.cast(trip_ind[:, 0], tf.int32)

        updates = tf.where(
            tf.expand_dims(is_uniform, axis=1),
            tf.tile(tf.expand_dims(uniform_val, axis=1), [1,3]),
            tf.cast(trip_ind, tf.int32)
        )

        q_idx = tf.repeat(tf.range(Qn), repeats=3)
        p_idx = tf.tile(idx3, multiples=[Qn])
        scatter_idx = tf.stack([q_idx, p_idx], axis=1)
        final_mask = tf.tensor_scatter_nd_update(final_mask, scatter_idx, tf.reshape(updates, [-1]))

    return final_mask

def apply_parity_rotation(pairs: tf.Tensor, collapse_mask: tf.Tensor):
    Qn = tf.shape(pairs)[0]
    prime = tf.broadcast_to(PRIME_MASK_30[tf.newaxis,:], [Qn,30])
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32)
    sign = tf.where(affected > 0, tf.constant(-1.0, tf.float32), tf.constant(1.0, tf.float32))
    return pairs * sign[...,None], affected

def bitmap(rotated_pairs: tf.Tensor) -> tf.Tensor:
    return tf.cast(rotated_pairs[...,0] > EPS, tf.int32)

# -----------------------------
# One view compute: prim6 -> reg30 -> collapse/parity -> bits30
# -----------------------------
def compute_bits_for_view(prim6: tf.Tensor,
                          canonicalize_addsub: bool = True) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
    reg30, swap_flags = build_register30(prim6, canonicalize_addsub=canonicalize_addsub)
    collapse = detect_collapse_triplet_scatter(reg30)
    rotated, parity = apply_parity_rotation(reg30, collapse)
    bits = bitmap(rotated)
    return bits, swap_flags, collapse, parity

# -----------------------------
# Commitments keyed off full 3-view payload
# -----------------------------
def commit_full90(packed_u32_xyz: np.ndarray,
                  initiator_axis: np.ndarray,
                  swapcount_xyz: np.ndarray,
                  instr_counter: int,
                  domain_sep: bytes = b"NTHISA90") -> List[bytes]:
    """
    packed_u32_xyz: [Q,3] uint32 (x/y/z view packed 30-bit)
    initiator_axis: [Q] uint8
    swapcount_xyz:  [Q,3] uint8 (#swaps in 8 interaction triplets per view; 0..8)
    """
    commits = []
    # Patch v0.5.1: include Q in domain_sep for commit_full90 to address collision
    domain_sep_patched = domain_sep + Q.to_bytes(2, "little", signed=False)
    for q in range(packed_u32_xyz.shape[0]):
        msg = (
            domain_sep_patched +
            int(instr_counter).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,0]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,1]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,2]).to_bytes(4,"little",signed=False) +
            int(initiator_axis[q]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,0]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,1]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,2]).to_bytes(1,"little",signed=False)
        )
        commits.append(hashlib.blake2s(msg, digest_size=32).digest())
    return commits

def tag_u64_from_commit(commit32: bytes, q_idx: int, instr_counter: int) -> np.uint64:
    msg = b"NTH_TAG0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    h = hashlib.blake2s(msg, digest_size=8).digest()
    return np.uint64(int.from_bytes(h, "little", signed=False))

@dataclass
class DedupResult:
    winner_of: np.ndarray
    active_mask: np.ndarray
    groups: Dict[bytes, List[int]] = field(default_factory=dict)
    freed: List[int] = field(default_factory=list)
    collision_qubits: int

def dedup_by_commit(commits: List[bytes], efficiency_score: np.ndarray, mode: str) -> DedupResult:
    groups: Dict[bytes, List[int]] = {}
    for q,c in enumerate(commits):
        groups.setdefault(c, []).append(q)

    winner_of = np.arange(len(commits), dtype=np.int32)
    active_mask = np.ones(len(commits), dtype=bool)
    freed: List[int] = []
    collision_qubits = 0

    for c, qs in groups.items():
        if len(qs) > 1:
            collision_qubits += (len(qs) - 1)

        # winner = min efficiency_score, tie -> min q index
        qs_sorted = sorted(qs, key=lambda q: (int(efficiency_score[q]), q))
        w = qs_sorted[0]
        for q in qs:
            winner_of[q] = w
            if q != w:
                active_mask[q] = False
                if mode == "FREE":
                    freed.append(q)

    return DedupResult(winner_of, active_mask, groups, freed, collision_qubits)

def derive_new_nec_for_freed(commit32: bytes, q_idx: int, instr_counter: int) -> Tuple[np.ndarray, np.ndarray]:
    msg = b"NTH_FREE0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    raw = hashlib.blake2s(msg, digest_size=16).digest()
    mags = np.array([raw[0] % 64, raw[1] % 64, raw[2] % 64], dtype=np.int32)
    sb = raw[3]
    signs = np.array([1 if (sb & 1) else -1, 1 if (sb & 2) else -1, 1 if (sb & 4) else -1], dtype=np.int32)
    return mags, signs

# -----------------------------
# Interpreter state
# -----------------------------
@dataclass
class ISAState:
    instr_counter: int = 0
    mode: str = "FREE"                  # FREE or PAIR_HUNT
    exec_active_only: bool = False      # if True, heavy ops run only on active winners

    # NEC state
    mag_xyz: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.int32))
    base_sign: np.ndarray = field(default_factory=lambda: np.ones((Q,3), dtype=np.int32))

    # Derived registers
    initiator: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.int32))
    packed_views_u32: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.uint32))
    packed90_u64: np.ndarray = field(default_factory=lambda: np.zeros((Q,2), dtype=np.uint64))
    bits_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,30), dtype=np.int32))
    swapcount_xyz: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.uint8))
    commits: List[bytes] = field(default_factory=lambda: [b""]*Q)
    tags_u64: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.uint64))

    # Dedup scheduling
    dedup: Optional[DedupResult] = None
    active_mask: np.ndarray = field(default_factory=lambda: np.ones((Q,), dtype=bool))
    winner_of: np.ndarray = field(default_factory=lambda: np.arange(Q, dtype=np.int32))

# -----------------------------
# Interpreter core: ops
# -----------------------------
def op_NEC_LOAD(st: ISAState, seed: int = 11, mag_range: int = 64):
    rng = np.random.default_rng(seed + st.instr_counter)
    st.mag_xyz = rng.integers(0, mag_range, size=(Q,3), dtype=np.int32)
    sign_bits = rng.integers(0, 2, size=(Q,3), dtype=np.int32)
    st.base_sign = np.where(sign_bits > 0, 1, -1).astype(np.int32)

def op_INIT_SELECT(st: ISAState):
    mag_tf = tf.constant(st.mag_xyz, dtype=tf.int32)
    st.initiator = initiator_from_shared_counts(mag_tf).numpy().astype(np.int32)

def _compute_views_full(st: ISAState):
    mag_tf  = tf.constant(st.mag_xyz, dtype=tf.int32)
    sign_tf = tf.constant(st.base_sign, dtype=tf.int32)

    prim0 = build_primaries_from_nec(mag_tf, sign_tf)  # [Q,6,2]
    # init_tf = tf.constant(st.initiator, dtype=tf.int32) # Not used in full compute, so commented out

    prim_x = permute_primaries_by_initiator(prim0, tf.zeros([Q], tf.int32))
    prim_y = permute_primaries_by_initiator(prim0, tf.ones([Q], tf.int32))
    prim_z = permute_primaries_by_initiator(prim0, tf.fill([Q], tf.constant(2, tf.int32)))

    def eval_view(prim6):
        reg30, swaps = build_register30(prim6, canonicalize_addsub=True)
        collapse = detect_collapse_triplet_scatter(reg30)
        rotated, _ = apply_parity_rotation(reg30, collapse)
        bits = bitmap(rotated)
        packed = pack30_to_u32_tf(bits)
        swapcount = tf.reduce_sum(swaps, axis=1)
        return bits, packed, swapcount

    bits_x, packed_x, swap_x = eval_view(prim_x)
    bits_y, packed_y, swap_y = eval_view(prim_y)
    bits_z, packed_z, swap_z = eval_view(prim_z)

    st.bits_views = np.stack([bits_x.numpy().astype(np.int32),
                              bits_y.numpy().astype(np.int32),
                              bits_z.numpy().astype(np.int32)], axis=1)
    st.packed_views_u32 = tf.stack([packed_x, packed_y, packed_z], axis=1).numpy().astype(np.uint32)
    st.swapcount_xyz = np.stack([swap_x.numpy().astype(np.uint8),
                                 swap_y.numpy().astype(np.uint8),
                                 swap_z.numpy().astype(np.uint8)], axis=1).astype(np.uint8)
    st.packed90_u64 = pack90_from_3x30_u32(st.packed_views_u32)

def _compute_views_active_only(st: ISAState):
    # Compute heavy pipeline only for active winners, then fill aliases based on mode
    active_idx = np.where(st.active_mask)[0].astype(np.int32)
    alias_idx = np.where(~st.active_mask)[0].astype(np.int32)
    Qa = int(active_idx.shape[0])

    if Qa == 0:
        # nothing active; clear outputs
        st.bits_views[:] = 0
        st.packed_views_u32[:] = 0
        st.swapcount_xyz[:] = 0
        st.packed90_u64[:] = 0
        st.commits = [b""]*Q
        st.tags_u64[:] = 0
        return

    mag_a  = tf.constant(st.mag_xyz[active_idx], dtype=tf.int32)
    sign_a = tf.constant(st.base_sign[active_idx], dtype=tf.int32)

    prim0_a = build_primaries_from_nec(mag_a, sign_a)
    # initiator for active subset only (optimization)
    init_a = initiator_from_shared_counts(mag_a)

    prim_x = permute_primaries_by_initiator(prim0_a, tf.zeros([Qa], tf.int32))
    prim_y = permute_primaries_by_initiator(prim0_a, tf.ones([Qa], tf.int32))
    prim_z = permute_primaries_by_initiator(prim0_a, tf.fill([Qa], tf.constant(2, tf.int32)))

    def eval_view(prim6):
        reg30, swaps = build_register30(prim6, canonicalize_addsub=True)
        collapse = detect_collapse_triplet_scatter(reg30)
        rotated, _ = apply_parity_rotation(reg30, collapse)
        bits = bitmap(rotated)
        packed = pack30_to_u32_tf(bits)
        swapcount = tf.reduce_sum(swaps, axis=1)
        return bits, packed, swapcount

    bits_x, packed_x, swap_x = eval_view(prim_x)
    bits_y, packed_y, swap_y = eval_view(prim_y)
    bits_z, packed_z, swap_z = eval_view(prim_z)

    bits_views_a = np.stack([bits_x.numpy().astype(np.int32),
                             bits_y.numpy().astype(np.int32),
                             bits_z.numpy().astype(np.int32)], axis=1)
    packed_a = tf.stack([packed_x, packed_y, packed_z], axis=1).numpy().astype(np.uint32)
    swapcount_a = np.stack([swap_x.numpy().astype(np.uint8),
                            swap_y.numpy().astype(np.uint8),
                            swap_z.numpy().astype(np.uint8)], axis=1).astype(np.uint8)

    # Scatter active results into full arrays
    st.bits_views[:] = 0
    st.packed_views_u32[:] = 0
    st.swapcount_xyz[:] = 0
    for i,q in enumerate(active_idx):
        st.bits_views[q] = bits_views_a[i]
        st.packed_views_u32[q] = packed_a[i]
        st.swapcount_xyz[q] = swapcount_a[i]

    # Fill aliases
    if st.mode == "PAIR_HUNT":
        # aliases copy winner payload
        for q in alias_idx:
            w = st.winner_of[q]
            st.bits_views[q] = st.bits_views[w]
            st.packed_views_u32[q] = st.packed_views_u32[w]
            st.swapcount_xyz[q] = st.swapcount_xyz[w]
    else:
        # FREE: freed qubits may be doing new work later; for now keep payload=0 (uncomputed this cycle)
        pass

    st.packed90_u64 = pack90_from_3x30_u32(st.packed_views_u32)

def op_REG30_BUILD_BITMAP_PACK(st: ISAState):
    # This op is a fused execution of: REG30_BUILD + COLLAPSE + PARITY + BITMAP + PACK
    # It exists because a real backend would fuse these kernels, but the IR still names the primitives.
    if st.exec_active_only and st.dedup is not None:
        _compute_views_active_only(st)
    else:
        _compute_views_full(st)

def op_COMMIT(st: ISAState):
    st.commits = commit_full90(st.packed_views_u32,
                              st.initiator.astype(np.uint8),
                              st.swapcount_xyz.astype(np.uint8),
                              st.instr_counter)
    st.tags_u64 = np.array([tag_u64_from_commit(st.commits[q], q, st.instr_counter) for q in range(Q)], dtype=np.uint64)

def op_DEDUP(st: ISAState):
    # Efficiency score prototype: fewer swaps => cheaper
    eff = st.swapcount_xyz.sum(axis=1).astype(np.int32)
    st.dedup = dedup_by_commit(st.commits, eff, st.mode)
    st.active_mask = st.dedup.active_mask.copy()
    st.winner_of = st.dedup.winner_of.copy()

def op_FREE_ALIAS(st: ISAState):
    if st.dedup is None:
        return
    alias_idx = np.where(~st.active_mask)[0].astype(np.int32)

    if st.mode == "PAIR_HUNT":
        # aliases inherit winner NEC as well (remain paired)
        for q in alias_idx:
            w = st.winner_of[q]
            st.mag_xyz[q] = st.mag_xyz[w]
            st.base_sign[q] = st.base_sign[w]
    else:
        # FREE: reassign alias qubits to new NEC states deterministically from their own commit
        for q in alias_idx:
            mags, signs = derive_new_nec_for_freed(st.commits[q], q, st.instr_counter + 1)
            st.mag_xyz[q] = mags
            st.base_sign[q] = signs

def op_EXEC_ACTIVE_ONLY(st: ISAState, enabled: bool = True):
    st.exec_active_only = bool(enabled)

def op_NEXT(st: ISAState):
    st.instr_counter += 1

# -----------------------------
# Interpreter runner
# -----------------------------
OP_TABLE = {
    "NEC_LOAD": op_NEC_LOAD,
    "INIT_SELECT": op_INIT_SELECT,
    "REG30_BUILD": op_REG30_BUILD_BITMAP_PACK,    # fused for now
    "COLLAPSE": lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "PARITY":   lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "BITMAP":   lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "PACK":     lambda st, **k: None,             # named primitive; fused in REG30_BUILD
    "COMMIT": op_COMMIT,
    "DEDUP": op_DEDUP,
    "FREE_ALIAS": op_FREE_ALIAS,
    "EXEC_ACTIVE_ONLY": op_EXEC_ACTIVE_ONLY,
    "NEXT": op_NEXT,
}

def run_program(st: ISAState, program: List[Instr], verbose: bool = True):
    for ins in program:
        fn = OP_TABLE.get(ins.op)
        if fn is None:
            raise ValueError(f"Unknown opcode: {ins.op}")
        fn(st, **ins.args)
        if verbose and ins.op in ("REG30_BUILD","COMMIT","DEDUP","FREE_ALIAS","NEXT"):
            if ins.op == "REG30_BUILD":
                print(f"[t={st.instr_counter}] REG30_BUILD done. packed90_u64[0]={st.packed90_u64[0].tolist()}")
            elif ins.op == "COMMIT":
                print(f"[t={st.instr_counter}] COMMIT done. commit0={st.commits[0].hex()[:16]}...")
            elif ins.op == "DEDUP":
                print(f"[t={st.instr_counter}] DEDUP done. collisions={st.dedup.collision_qubits} groups={len(st.dedup.groups)} active={int(st.active_mask.sum())}")
            elif ins.op == "FREE_ALIAS":
                print(f"[t={st.instr_counter}] FREE/ALIAS applied. mode={st.mode} aliases={Q-int(st.active_mask.sum())}")
            elif ins.op == "NEXT":
                print(f"--- NEXT instr_counter={st.instr_counter} ---")

# -----------------------------
# Demo microprogram: two instruction cycles with DEDUP + FREE/ALIAS + EXEC_ACTIVE_ONLY
# -----------------------------
st = ISAState(mode="FREE", instr_counter=0)
np.random.seed(11)
tf.random.set_seed(11)

program = [
    Instr("NEC_LOAD", {"seed": 11}),
    Instr("INIT_SELECT"),
    Instr("REG30_BUILD"),
    Instr("COMMIT"),
    Instr("DEDUP"),
    Instr("FREE_ALIAS"),
    Instr("EXEC_ACTIVE_ONLY", {"enabled": True}),
    Instr("NEXT"),

    # second cycle (will compute heavy ops only for active winners if exec_active_only=True)
    Instr("INIT_SELECT"),
    Instr("REG30_BUILD"),
    Instr("COMMIT"),
    Instr("DEDUP"),
    Instr("FREE_ALIAS"),
    Instr("NEXT"),
]

print("=== UniversalISA v0.5 Interpreter Demo ===")
print("MODE:", st.mode, "EXEC_ACTIVE_ONLY initially:", st.exec_active_only)
run_program(st, program, verbose=True)

print("\n=== Final State Snapshot ===")
print("instr_counter:", st.instr_counter)
print("initiator axis counts:", np.bincount(st.initiator, minlength=3).tolist(), "(0=x,1=y,2=z)")
print("packed_views_u32[0]:", st.packed_views_u32[0].tolist())
print("packed90_u64[0]:", st.packed90_u64[0].tolist())
print("active qubits:", int(st.active_mask.sum()), "aliases:", Q-int(st.active_mask.sum()))
print("tag[0]:", int(st.tags_u64[0]))
print("Sample qubits (q, mag, sign, winner, active):")
for q in range(8):
    print(q, st.mag_xyz[q].tolist(), st.base_sign[q].tolist(), int(st.winner_of[q]), bool(st.active_mask[q]))


=== UniversalISA v0.5 Interpreter Demo ===
MODE: FREE EXEC_ACTIVE_ONLY initially: False
[t=0] REG30_BUILD done. packed90_u64[0]=[9232960233523929384, 525330]
[t=0] COMMIT done. commit0=bcf249254eb62024...
[t=0] DEDUP done. collisions=15 groups=49 active=49
[t=0] FREE/ALIAS applied. mode=FREE aliases=15
--- NEXT instr_counter=1 ---
[t=1] REG30_BUILD done. packed90_u64[0]=[9232960233523929384, 525330]
[t=1] COMMIT done. commit0=dc0ec393d083aac7...
[t=1] DEDUP done. collisions=23 groups=41 active=41
[t=1] FREE/ALIAS applied. mode=FREE aliases=23
--- NEXT instr_counter=2 ---

=== Final State Snapshot ===
instr_counter: 2
initiator axis counts: [28, 20, 16] (0=x,1=y,2=z)
packed_views_u32[0]: [8413480, 8929704, 8405288]
packed90_u64[0]: [9232960233523929384, 525330]
active qubits: 41 aliases: 23
tag[0]: 8100722577479211034
Sample qubits (q, mag, sign, winner, active):
0 [8, 8, 51] [1, 1, 1] 0 True
1 [31, 37, 38] [1, -1, -1] 1 True
2 [45, 1, 31] [1, 1, -1] 2 True
3 [9, 25, 59] [-1, 1, -1] 3 T

In [None]:
# UniversalISA v0.6 (Colab single-cell, full corrected code)
# Goals:
#  - Minimal IR/opcode format + interpreter that runs instruction streams
#  - Explicit stage opcodes: NEC_LOAD, INIT_SELECT, REG30_BUILD, COLLAPSE, PARITY, BITMAP, PACK, COMMIT, DEDUP, FREE_ALIAS, EXEC_ACTIVE_ONLY, NEXT
#  - Preserves triplet loops + scatter updates in COLLAPSE
#  - True NEC phase-dual selectors (Real=[+m,-m], Unreal=[-m,+m])
#  - 3 initiator views => 90-bit payload per qubit (3×30-bit)
#  - Add/Sub-only reorder canonicalization; MUL/DIV fixed
#  - Batch-level DEDUP keyed off full 90-bit commitment
#  - Correct FREE semantics: freed qubits are reassigned new NEC and become ACTIVE next epoch
#  - Correct initiator semantics: initiator always computed from FULL 64-qubit array (even if later stages are active-only)

import tensorflow as tf
import numpy as np
import hashlib
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

# -----------------------------
# Global constants / fixed geometry
# -----------------------------
Q = 64
TAU_HI      = 1.0
TAU_LOW     = -1.0
EPS         = 1e-6
R_FOR_RATIO = 64.0

PRIME_MASK_30 = tf.constant(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=tf.int32
)

# 10 triplets over 30 slots: [0..2],[3..5],...,[27..29]
TRIPLET_IDX = tf.constant([[3*t,3*t+1,3*t+2] for t in range(10)], dtype=tf.int32)

# -----------------------------
# IR / Opcode model
# -----------------------------
@dataclass
class Instr:
    op: str
    args: Dict[str, Any] = field(default_factory=dict)

# -----------------------------
# Utilities: pack/bitslice
# -----------------------------
def pack30_to_u32_tf(bits30_i32: tf.Tensor) -> tf.Tensor:
    bits_u32 = tf.cast(bits30_i32, tf.uint32)               # [Q,30]
    shifts = tf.cast(tf.range(30), tf.uint32)               # [30]
    return tf.reduce_sum(tf.bitwise.left_shift(bits_u32, shifts), axis=1)  # [Q] u32

def pack90_from_3x30_u32(packed3_u32: np.ndarray) -> np.ndarray:
    # packed3_u32: [Q,3] uint32 -> [Q,2] uint64 (90 bits packed)
    out = np.zeros((packed3_u32.shape[0],2), dtype=np.uint64)
    for q in range(packed3_u32.shape[0]):
        b0 = np.uint64(packed3_u32[q,0] & np.uint32((1<<30)-1))
        b1 = np.uint64(packed3_u32[q,1] & np.uint32((1<<30)-1))
        b2 = np.uint64(packed3_u32[q,2] & np.uint32((1<<30)-1))
        low = b0 | (b1 << np.uint64(30)) | ((b2 & np.uint64(0xF)) << np.uint64(60))
        high = (b2 >> np.uint64(4))
        out[q,0] = low
        out[q,1] = high
    return out

# -----------------------------
# Phase-dual core ops
# -----------------------------
def add_pd(a,b): return a+b
def mul_pd(a,b): return a*b
def div_pd(a,b): return tf.where(tf.abs(b) > EPS, a/b, tf.zeros_like(a))

# -----------------------------
# NEC selectors (true phase-dual) + initiator logic
# -----------------------------
def sel_real(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([m, -m], axis=1)  # [+m,-m]

def sel_unreal(m: tf.Tensor) -> tf.Tensor:
    return tf.stack([-m, m], axis=1)  # [-m,+m]

def build_primaries_from_nec(mag_xyz_i32: tf.Tensor, base_sign_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    mag_xyz_i32: [Q,3] int32 magnitudes >=0
    base_sign_xyz_i32: [Q,3] int32 in {-1,+1}
      +1 => axis initiated real   => (RealSelector, UnrealSelector)
      -1 => axis initiated unreal => (UnrealSelector, RealSelector)
    Returns: [Q,6,2] float32 in canonical axis-pair order [X0,X1,Y0,Y1,Z0,Z1]
    """
    mag = tf.cast(mag_xyz_i32, tf.float32)
    mX, mY, mZ = mag[:,0], mag[:,1], mag[:,2]
    Xr, Xu = sel_real(mX), sel_unreal(mX)
    Yr, Yu = sel_real(mY), sel_unreal(mY)
    Zr, Zu = sel_real(mZ), sel_unreal(mZ)

    sign_is_unreal = base_sign_xyz_i32 < 0  # [Q,3]
    X0 = tf.where(sign_is_unreal[:,0:1], Xu, Xr)
    X1 = tf.where(sign_is_unreal[:,0:1], Xr, Xu)
    Y0 = tf.where(sign_is_unreal[:,1:2], Yu, Yr)
    Y1 = tf.where(sign_is_unreal[:,1:2], Yr, Yu)
    Z0 = tf.where(sign_is_unreal[:,2:3], Zu, Zr)
    Z1 = tf.where(sign_is_unreal[:,2:3], Zr, Zu)

    return tf.stack([X0,X1,Y0,Y1,Z0,Z1], axis=1)

def initiator_from_shared_counts(mag_xyz_i32: tf.Tensor) -> tf.Tensor:
    """
    Initiator axis selection based on shared axis magnitudes across the full array.
    Returns: [Q] int32 in {0,1,2} for (x,y,z)
    """
    mag = mag_xyz_i32
    eqx = tf.equal(tf.expand_dims(mag[:,0], 1), tf.expand_dims(mag[:,0], 0))
    eqy = tf.equal(tf.expand_dims(mag[:,1], 1), tf.expand_dims(mag[:,1], 0))
    eqz = tf.equal(tf.expand_dims(mag[:,2], 1), tf.expand_dims(mag[:,2], 0))
    cx = tf.reduce_sum(tf.cast(eqx, tf.int32), axis=1)
    cy = tf.reduce_sum(tf.cast(eqy, tf.int32), axis=1)
    cz = tf.reduce_sum(tf.cast(eqz, tf.int32), axis=1)

    counts = tf.stack([cx,cy,cz], axis=1)  # [Q,3]
    shared_mask = counts > 1
    shared_count = tf.reduce_sum(tf.cast(shared_mask, tf.int32), axis=1)

    init_one_shared = tf.argmax(counts, axis=1, output_type=tf.int32)
    is_unshared = tf.equal(counts, 1)
    init_two_shared = tf.argmax(tf.cast(is_unshared, tf.int32), axis=1, output_type=tf.int32)
    init_three_shared = tf.argmax(counts, axis=1, output_type=tf.int32)

    bias = tf.constant([2,1,0], tf.int32)  # tie-break: prefer x>y>z
    mags_biased = mag_xyz_i32 * 1000 + bias
    init_none_shared = tf.argmax(mags_biased, axis=1, output_type=tf.int32)

    initiator = tf.where(shared_count == 1, init_one_shared,
                 tf.where(shared_count == 2, init_two_shared,
                 tf.where(shared_count == 3, init_three_shared, init_none_shared)))
    return initiator

def permute_primaries_by_initiator(prim6: tf.Tensor, initiator_axis: tf.Tensor) -> tf.Tensor:
    """
    prim6: [Q,6,2] in [X0,X1,Y0,Y1,Z0,Z1]
    initiator_axis: [Q] in {0,1,2}
    cyclic pairs:
      x: [X0,X1, Y0,Y1, Z0,Z1]
      y: [Y0,Y1, Z0,Z1, X0,X1]
      z: [Z0,Z1, X0,X1, Y0,Y1]
    """
    Qn = tf.shape(prim6)[0]
    idx_x = tf.constant([0,1,2,3,4,5], tf.int32)
    idx_y = tf.constant([2,3,4,5,0,1], tf.int32)
    idx_z = tf.constant([4,5,0,1,2,3], tf.int32)

    idx = tf.where(tf.expand_dims(initiator_axis==0,1), tf.broadcast_to(idx_x, [Qn,6]),
          tf.where(tf.expand_dims(initiator_axis==1,1), tf.broadcast_to(idx_y, [Qn,6]),
                                                          tf.broadcast_to(idx_z, [Qn,6])))
    return tf.gather(prim6, idx, axis=1, batch_dims=1)

# -----------------------------
# REG30 build: 10 triplets, (+,-) reorderable; (*,/) fixed
# -----------------------------
def build_register30(prim6: tf.Tensor, canonicalize_addsub: bool = True) -> Tuple[tf.Tensor, tf.Tensor]:
    """
    Returns:
      reg30: [Q,30,2]
      swap_flags: [Q,8] int32, swap decisions per interaction triplet (2..9)
    """
    p0,p1,p2,p3,p4,p5 = tf.unstack(prim6, axis=1)
    A0,A1 = p0,p1
    B0,B1 = p2,p3
    C0,C1 = p4,p5

    # 8 interaction triplets: [ADD,SUB,OP3] with OP3 fixed as MUL or DIV
    spec = [
        (A0, B0, "MUL"),
        (B0, C0, "MUL"),
        (A0, C0, "MUL"),
        (A1, B1, "MUL"),
        (A0, B1, "DIV"),
        (B0, C1, "DIV"),
        (A1, C0, "DIV"),
        (B1, C1, "DIV"),
    ]

    add_list, sub_list, op3_list, swap_flags = [], [], [], []
    for (u,v,op3) in spec:
        addv = add_pd(u,v)
        subv = u - v
        opv  = mul_pd(u,v) if op3=="MUL" else div_pd(u,v)

        # canonicalize only ADD/SUB: swap if SUB.real > ADD.real
        if canonicalize_addsub:
            swap = tf.cast(subv[:,0] > addv[:,0], tf.int32)  # [Q]
            addv2 = tf.where(swap[:,None] > 0, subv, addv)
            subv2 = tf.where(swap[:,None] > 0, addv, subv)
            addv, subv = addv2, subv2
        else:
            swap = tf.zeros([tf.shape(u)[0]], tf.int32)

        add_list.append(addv); sub_list.append(subv); op3_list.append(opv); swap_flags.append(swap)

    reg = [p0,p1,p2, p3,p4,p5]
    for i in range(8):
        reg.extend([add_list[i], sub_list[i], op3_list[i]])
    reg = tf.stack(reg, axis=1)               # [Q,30,2]
    swap_flags = tf.stack(swap_flags, axis=1) # [Q,8]
    return reg, swap_flags

# -----------------------------
# COLLAPSE (triplet loops + scatter updates preserved)
# -----------------------------
def detect_collapse_triplet_scatter(pairs: tf.Tensor,
                                   tau_hi: float = TAU_HI,
                                   tau_low: float = TAU_LOW,
                                   r_for_ratio: float = R_FOR_RATIO) -> tf.Tensor:
    """
    pairs: [Q,30,2] -> collapse_mask: [Q,30] int32
    Keeps per-triplet loop + scatter update semantics.
    """
    real = pairs[..., 0]
    unreal = pairs[..., 1]
    Qn = tf.shape(pairs)[0]

    cond1 = tf.logical_and(real >= tau_hi, unreal <= tau_low)
    ratio = tf.where(tf.abs(unreal) > EPS, real / unreal, tf.zeros_like(real))
    cond2 = ratio > r_for_ratio
    individual = tf.logical_or(cond1, cond2)

    final_mask = tf.cast(individual, tf.int32)

    for t in tf.range(10):
        idx3 = TRIPLET_IDX[t]
        trip_ind = tf.gather(individual, idx3, axis=1)
        is_uniform = tf.reduce_all(tf.equal(trip_ind, trip_ind[:,0:1]), axis=1)
        uniform_val = tf.cast(trip_ind[:,0], tf.int32)

        updates = tf.where(
            tf.expand_dims(is_uniform, axis=1),
            tf.tile(tf.expand_dims(uniform_val, axis=1), [1,3]),
            tf.cast(trip_ind, tf.int32)
        )

        q_idx = tf.repeat(tf.range(Qn), repeats=3)
        p_idx = tf.tile(idx3, multiples=[Qn])
        scatter_idx = tf.stack([q_idx, p_idx], axis=1)
        final_mask = tf.tensor_scatter_nd_update(final_mask, scatter_idx, tf.reshape(updates, [-1]))

    return final_mask

# -----------------------------
# PARITY + BITMAP
# -----------------------------
def apply_parity_rotation(pairs: tf.Tensor, collapse_mask: tf.Tensor):
    Qn = tf.shape(pairs)[0]
    prime = tf.broadcast_to(PRIME_MASK_30[tf.newaxis,:], [Qn,30])
    affected = tf.cast(tf.logical_or(prime > 0, collapse_mask > 0), tf.int32)
    sign = tf.where(affected > 0, tf.constant(-1.0, tf.float32), tf.constant(1.0, tf.float32))
    return pairs * sign[...,None], affected

def bitmap(rotated_pairs: tf.Tensor) -> tf.Tensor:
    return tf.cast(rotated_pairs[...,0] > EPS, tf.int32)

# -----------------------------
# Commit/tag/dedup/free
# -----------------------------
def commit_full90(packed_u32_xyz: np.ndarray,
                  initiator_axis: np.ndarray,
                  swapcount_xyz: np.ndarray,
                  instr_counter: int,
                  domain_sep: bytes = b"NTHISA90") -> List[bytes]:
    commits = []
    for q in range(packed_u32_xyz.shape[0]):
        msg = (
            domain_sep +
            int(instr_counter).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,0]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,1]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,2]).to_bytes(4,"little",signed=False) +
            int(initiator_axis[q]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,0]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,1]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,2]).to_bytes(1,"little",signed=False)
        )
        commits.append(hashlib.blake2s(msg, digest_size=32).digest())
    return commits

def tag_u64_from_commit(commit32: bytes, q_idx: int, instr_counter: int) -> np.uint64:
    msg = b"NTH_TAG0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    h = hashlib.blake2s(msg, digest_size=8).digest()
    return np.uint64(int.from_bytes(h, "little", signed=False))

@dataclass
class DedupResult:
    winner_of: np.ndarray
    active_mask: np.ndarray
    groups: Dict[bytes, List[int]]
    freed: List[int]
    collision_qubits: int

def dedup_by_commit(commits: List[bytes], efficiency_score: np.ndarray, mode: str) -> DedupResult:
    groups: Dict[bytes, List[int]] = {}
    for q,c in enumerate(commits):
        groups.setdefault(c, []).append(q)

    winner_of = np.arange(len(commits), dtype=np.int32)
    active_mask = np.ones(len(commits), dtype=bool)
    freed: List[int] = []
    collision_qubits = 0

    for c, qs in groups.items():
        if len(qs) > 1:
            collision_qubits += (len(qs) - 1)

        qs_sorted = sorted(qs, key=lambda q: (int(efficiency_score[q]), q))  # min score, tie min q
        w = qs_sorted[0]
        for q in qs:
            winner_of[q] = w
            if q != w:
                active_mask[q] = False
                if mode == "FREE":
                    freed.append(q)

    return DedupResult(winner_of, active_mask, groups, freed, collision_qubits)

def derive_new_nec_for_freed(commit32: bytes, q_idx: int, instr_counter: int) -> Tuple[np.ndarray, np.ndarray]:
    msg = b"NTH_FREE0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    raw = hashlib.blake2s(msg, digest_size=16).digest()
    mags = np.array([raw[0] % 64, raw[1] % 64, raw[2] % 64], dtype=np.int32)
    sb = raw[3]
    signs = np.array([1 if (sb & 1) else -1, 1 if (sb & 2) else -1, 1 if (sb & 4) else -1], dtype=np.int32)
    return mags, signs

# -----------------------------
# Interpreter state (register file)
# -----------------------------
@dataclass
class ISAState:
    instr_counter: int = 0
    mode: str = "PAIR_HUNT"                  # FREE or PAIR_HUNT
    exec_active_only: bool = False      # if True, stages run only on active winners when dedup exists & PAIR_HUNT

    # NEC state registers
    mag_xyz: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.int32))
    base_sign: np.ndarray = field(default_factory=lambda: np.ones((Q,3), dtype=np.int32))

    # scheduling registers
    dedup: Optional[DedupResult] = None
    active_mask: np.ndarray = field(default_factory=lambda: np.ones((Q,), dtype=bool))
    winner_of: np.ndarray = field(default_factory=lambda: np.arange(Q, dtype=np.int32))

    # derived registers
    initiator: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.int32))            # [Q]
    reg30_views: Optional[np.ndarray] = None                                                         # [Q,3,30,2] float32
    swapflags_views: Optional[np.ndarray] = None                                                      # [Q,3,8] int32
    collapse_views: Optional[np.ndarray] = None                                                       # [Q,3,30] int32
    parity_views: Optional[np.ndarray] = None                                                         # [Q,3,30] int32
    rotated_views: Optional[np.ndarray] = None                                                        # [Q,3,30,2] float32
    bits_views: Optional[np.ndarray] = None                                                           # [Q,3,30] int32
    packed_views_u32: Optional[np.ndarray] = None                                                     # [Q,3] uint32
    packed90_u64: Optional[np.ndarray] = None                                                         # [Q,2] uint64
    swapcount_xyz: Optional[np.ndarray] = None                                                        # [Q,3] uint8
    commits: List[bytes] = field(default_factory=lambda: [b""]*Q)
    tags_u64: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.uint64))

# -----------------------------
# Stage helpers (full vs active-only execution)
# -----------------------------
def _get_active_indices_for_compute(st: ISAState) -> Tuple[np.ndarray, np.ndarray]:
    active_idx = np.where(st.active_mask)[0].astype(np.int32)
    alias_idx = np.where(~st.active_mask)[0].astype(np.int32)
    return active_idx, alias_idx

def _should_active_only(st: ISAState) -> bool:
    # active-only compute is only valid when:
    # - exec_active_only enabled
    # - dedup exists
    # - mode is PAIR_HUNT (aliases can be safely copied)
    return st.exec_active_only and (st.dedup is not None) and (st.mode == "PAIR_HUNT")

# -----------------------------
# Opcode implementations
# -----------------------------
def op_NEC_LOAD(st: ISAState, seed: int = 11, mag_range: int = 64):
    rng = np.random.default_rng(seed + st.instr_counter)
    st.mag_xyz = rng.integers(0, mag_range, size=(Q,3), dtype=np.int32)
    sign_bits = rng.integers(0, 2, size=(Q,3), dtype=np.int32)
    st.base_sign = np.where(sign_bits > 0, 1, -1).astype(np.int32)
    # fresh epoch: all active
    st.active_mask[:] = True
    st.winner_of[:] = np.arange(Q, dtype=np.int32)
    st.dedup = None

def op_INIT_SELECT(st: ISAState):
    # initiator MUST be computed on full array per spec (relative position)
    mag_tf = tf.constant(st.mag_xyz, dtype=tf.int32)
    st.initiator = initiator_from_shared_counts(mag_tf).numpy().astype(np.int32)

def op_REG30_BUILD(st: ISAState):
    """
    Builds reg30 for each of the three initiator views (x/y/z) and stores reg30_views and swapflags_views.
    If active-only mode is enabled (PAIR_HUNT + dedup exists), compute only for active winners and leave aliases empty here.
    Aliases will be filled at BITMAP stage by copying winner results.
    """
    # Prepare buffers
    st.reg30_views = np.zeros((Q,3,30,2), dtype=np.float32)
    st.swapflags_views = np.zeros((Q,3,8), dtype=np.int32)

    mag_tf = tf.constant(st.mag_xyz, dtype=tf.int32)
    sign_tf = tf.constant(st.base_sign, dtype=tf.int32)
    prim0 = build_primaries_from_nec(mag_tf, sign_tf)  # full [Q,6,2]

    if _should_active_only(st):
        active_idx, _ = _get_active_indices_for_compute(st)
        prim0_a = tf.gather(prim0, active_idx, axis=0)

        Qa = int(active_idx.shape[0])
        prim_x = permute_primaries_by_initiator(prim0_a, tf.zeros([Qa], tf.int32))
        prim_y = permute_primaries_by_initiator(prim0_a, tf.ones([Qa], tf.int32))
        prim_z = permute_primaries_by_initiator(prim0_a, tf.fill([Qa], tf.constant(2, tf.int32)))

        reg_x, sw_x = build_register30(prim_x, canonicalize_addsub=True)
        reg_y, sw_y = build_register30(prim_y, canonicalize_addsub=True)
        reg_z, sw_z = build_register30(prim_z, canonicalize_addsub=True)

        reg_x = reg_x.numpy(); reg_y = reg_y.numpy(); reg_z = reg_z.numpy()
        sw_x = sw_x.numpy();   sw_y = sw_y.numpy();   sw_z = sw_z.numpy()

        for i,q in enumerate(active_idx):
            st.reg30_views[q,0] = reg_x[i]
            st.reg30_views[q,1] = reg_y[i]
            st.reg30_views[q,2] = reg_z[i]
            st.swapflags_views[q,0] = sw_x[i]
            st.swapflags_views[q,1] = sw_y[i]
            st.swapflags_views[q,2] = sw_z[i]
    else:
        prim_x = permute_primaries_by_initiator(prim0, tf.zeros([Q], tf.int32))
        prim_y = permute_primaries_by_initiator(prim0, tf.ones([Q], tf.int32))
        prim_z = permute_primaries_by_initiator(prim0, tf.fill([Q], tf.constant(2, tf.int32)))

        reg_x, sw_x = build_register30(prim_x, canonicalize_addsub=True)
        reg_y, sw_y = build_register30(prim_y, canonicalize_addsub=True)
        reg_z, sw_z = build_register30(prim_z, canonicalize_addsub=True)

        st.reg30_views[:,0] = reg_x.numpy()
        st.reg30_views[:,1] = reg_y.numpy()
        st.reg30_views[:,2] = reg_z.numpy()
        st.swapflags_views[:,0] = sw_x.numpy()
        st.swapflags_views[:,1] = sw_y.numpy()
        st.swapflags_views[:,2] = sw_z.numpy()

    # swap counts (used for efficiency score and commitments)
    st.swapcount_xyz = np.sum(st.swapflags_views, axis=2).astype(np.uint8)  # [Q,3]

def op_COLLAPSE(st: ISAState):
    assert st.reg30_views is not None, "REG30_BUILD must run before COLLAPSE"
    st.collapse_views = np.zeros((Q,3,30), dtype=np.int32)

    if _should_active_only(st):
        active_idx, _ = _get_active_indices_for_compute(st)
        for v in range(3):
            reg_a = tf.constant(st.reg30_views[active_idx, v], dtype=tf.float32)  # [Qa,30,2]
            coll_a = detect_collapse_triplet_scatter(reg_a).numpy().astype(np.int32)
            for i,q in enumerate(active_idx):
                st.collapse_views[q,v] = coll_a[i]
    else:
        for v in range(3):
            reg = tf.constant(st.reg30_views[:,v], dtype=tf.float32)
            st.collapse_views[:,v] = detect_collapse_triplet_scatter(reg).numpy().astype(np.int32)

def op_PARITY(st: ISAState):
    assert st.reg30_views is not None and st.collapse_views is not None, "Need REG30_BUILD and COLLAPSE"
    st.rotated_views = np.zeros((Q,3,30,2), dtype=np.float32)
    st.parity_views = np.zeros((Q,3,30), dtype=np.int32)

    if _should_active_only(st):
        active_idx, _ = _get_active_indices_for_compute(st)
        for v in range(3):
            reg_a = tf.constant(st.reg30_views[active_idx, v], dtype=tf.float32)
            coll_a = tf.constant(st.collapse_views[active_idx, v], dtype=tf.int32)
            rot_a, par_a = apply_parity_rotation(reg_a, coll_a)
            rot_a = rot_a.numpy().astype(np.float32)
            par_a = par_a.numpy().astype(np.int32)
            for i,q in enumerate(active_idx):
                st.rotated_views[q,v] = rot_a[i]
                st.parity_views[q,v] = par_a[i]
    else:
        for v in range(3):
            reg = tf.constant(st.reg30_views[:,v], dtype=tf.float32)
            coll = tf.constant(st.collapse_views[:,v], dtype=tf.int32)
            rot, par = apply_parity_rotation(reg, coll)
            st.rotated_views[:,v] = rot.numpy().astype(np.float32)
            st.parity_views[:,v] = par.numpy().astype(np.int32)

def op_BITMAP(st: ISAState):
    assert st.rotated_views is not None, "Need PARITY before BITMAP"
    st.bits_views = np.zeros((Q,3,30), dtype=np.int32)

    if _should_active_only(st):
        active_idx, alias_idx = _get_active_indices_for_compute(st)
        # compute for active
        for v in range(3):
            rot_a = tf.constant(st.rotated_views[active_idx, v], dtype=tf.float32)
            bits_a = bitmap(rot_a).numpy().astype(np.int32)
            for i,q in enumerate(active_idx):
                st.bits_views[q,v] = bits_a[i]
        # fill aliases by copying winner bits
        for q in alias_idx:
            w = st.winner_of[q]
            st.bits_views[q,:,:] = st.bits_views[w,:,:]
    else:
        for v in range(3):
            rot = tf.constant(st.rotated_views[:,v], dtype=tf.float32)
            st.bits_views[:,v] = bitmap(rot).numpy().astype(np.int32)

def op_PACK(st: ISAState):
    assert st.bits_views is not None, "Need BITMAP before PACK"
    st.packed_views_u32 = np.zeros((Q,3), dtype=np.uint32)
    for v in range(3):
        st.packed_views_u32[:,v] = pack30_to_u32_tf(tf.constant(st.bits_views[:,v], dtype=tf.int32)).numpy().astype(np.uint32)
    st.packed90_u64 = pack90_from_3x30_u32(st.packed_views_u32)

def op_COMMIT(st: ISAState):
    assert st.packed_views_u32 is not None and st.swapcount_xyz is not None, "Need PACK and REG30_BUILD before COMMIT"
    st.commits = commit_full90(st.packed_views_u32,
                              st.initiator.astype(np.uint8),
                              st.swapcount_xyz.astype(np.uint8),
                              st.instr_counter)
    st.tags_u64 = np.array([tag_u64_from_commit(st.commits[q], q, st.instr_counter) for q in range(Q)], dtype=np.uint64)

def op_DEDUP(st: ISAState):
    assert st.commits is not None and len(st.commits) == Q, "Need COMMIT before DEDUP"
    eff = st.swapcount_xyz.sum(axis=1).astype(np.int32)  # prototype efficiency
    st.dedup = dedup_by_commit(st.commits, eff, st.mode)
    st.active_mask = st.dedup.active_mask.copy()
    st.winner_of = st.dedup.winner_of.copy()

def op_FREE_ALIAS(st: ISAState):
    """
    Correct semantics:
      - PAIR_HUNT: aliases remain aliases (inactive) and inherit winner NEC state
      - FREE: aliases are reassigned new NEC work AND become ACTIVE next epoch (dedup epoch resets)
    """
    if st.dedup is None:
        return

    active_idx, alias_idx = _get_active_indices_for_compute(st)

    if st.mode == "PAIR_HUNT":
        for q in alias_idx:
            w = st.winner_of[q]
            st.mag_xyz[q] = st.mag_xyz[w]
            st.base_sign[q] = st.base_sign[w]
        # remain inactive
    else:
        # FREE: deterministically assign new NEC to alias qubits for next instruction and RE-ACTIVATE them
        for q in alias_idx:
            mags, signs = derive_new_nec_for_freed(st.commits[q], q, st.instr_counter + 1)
            st.mag_xyz[q] = mags
            st.base_sign[q] = signs

        # reset dedup epoch: everyone active for new work next instruction
        st.active_mask[:] = True
        st.winner_of[:] = np.arange(Q, dtype=np.int32)
        st.dedup = None

def op_EXEC_ACTIVE_ONLY(st: ISAState, enabled: bool = True):
    st.exec_active_only = bool(enabled)

def op_NEXT(st: ISAState):
    st.instr_counter += 1

# -----------------------------
# Interpreter dispatch
# -----------------------------
OP_TABLE = {
    "NEC_LOAD": op_NEC_LOAD,
    "INIT_SELECT": op_INIT_SELECT,
    "REG30_BUILD": op_REG30_BUILD,
    "COLLAPSE": op_COLLAPSE,
    "PARITY": op_PARITY,
    "BITMAP": op_BITMAP,
    "PACK": op_PACK,
    "COMMIT": op_COMMIT,
    "DEDUP": op_DEDUP,
    "FREE_ALIAS": op_FREE_ALIAS,
    "EXEC_ACTIVE_ONLY": op_EXEC_ACTIVE_ONLY,
    "NEXT": op_NEXT,
}

def run_program(st: ISAState, program: List[Instr], verbose: bool = True):
    for ins in program:
        fn = OP_TABLE.get(ins.op)
        if fn is None:
            raise ValueError(f"Unknown opcode: {ins.op}")
        fn(st, **ins.args)

        if verbose and ins.op in ("REG30_BUILD","PACK","COMMIT","DEDUP","FREE_ALIAS","NEXT"):
            if ins.op == "REG30_BUILD":
                act = int(np.sum(st.active_mask))
                print(f"[t={st.instr_counter}] REG30_BUILD done. active={act} exec_active_only={st.exec_active_only} mode={st.mode}")
            elif ins.op == "PACK":
                print(f"[t={st.instr_counter}] PACK done. packed90_u64[0]={st.packed90_u64[0].tolist()}")
            elif ins.op == "COMMIT":
                print(f"[t={st.instr_counter}] COMMIT done. commit0={st.commits[0].hex()[:16]}...")
            elif ins.op == "DEDUP":
                print(f"[t={st.instr_counter}] DEDUP done. collisions={st.dedup.collision_qubits} groups={len(st.dedup.groups)} active={int(np.sum(st.active_mask))}")
            elif ins.op == "FREE_ALIAS":
                aliases = Q - int(np.sum(st.active_mask))
                print(f"[t={st.instr_counter}] FREE/ALIAS applied. mode={st.mode} aliases_now={aliases} (FREE resets to 0 aliases next epoch)")
            elif ins.op == "NEXT":
                print(f"--- NEXT instr_counter={st.instr_counter} ---")

# -----------------------------
# Demo program: 2 cycles
# -----------------------------
np.random.seed(11)
tf.random.set_seed(11)

st = ISAState(mode="PAIR_HUNT", instr_counter=0, exec_active_only=True)  # exec_active_only only takes effect in PAIR_HUNT
program = [
    Instr("NEC_LOAD", {"seed": 11}),
    # cycle 0
    Instr("INIT_SELECT"),
    Instr("REG30_BUILD"),
    Instr("COLLAPSE"),
    Instr("PARITY"),
    Instr("BITMAP"),
    Instr("PACK"),
    Instr("COMMIT"),
    Instr("DEDUP"),
    Instr("FREE_ALIAS"),
    Instr("NEXT"),
    # cycle 1
    Instr("INIT_SELECT"),
    Instr("REG30_BUILD"),
    Instr("COLLAPSE"),
    Instr("PARITY"),
    Instr("BITMAP"),
    Instr("PACK"),
    Instr("COMMIT"),
    Instr("DEDUP"),
    Instr("FREE_ALIAS"),
    Instr("NEXT"),
]

print("=== UniversalISA v0.6 Interpreter Demo ===")
print("MODE:", st.mode, "EXEC_ACTIVE_ONLY:", st.exec_active_only, "(only effective in PAIR_HUNT)")
run_program(st, program, verbose=True)

print("\n=== Final State Snapshot ===")
print("instr_counter:", st.instr_counter)
print("initiator axis counts:", np.bincount(st.initiator, minlength=3).tolist(), "(0=x,1=y,2=z)")
print("packed_views_u32[0]:", st.packed_views_u32[0].tolist())
print("packed90_u64[0]:", st.packed90_u64[0].tolist())
print("active qubits:", int(np.sum(st.active_mask)), "aliases:", Q-int(np.sum(st.active_mask)))
print("tag[0]:", int(st.tags_u64[0]))
print("Sample qubits (q, mag, sign, winner, active):")
for q in range(8):
    print(q, st.mag_xyz[q].tolist(), st.base_sign[q].tolist(), int(st.winner_of[q]), bool(st.active_mask[q]))

print("\n--- If you want to see active-only savings, switch MODE to 'PAIR_HUNT' and rerun. ---")

=== UniversalISA v0.6 Interpreter Demo ===
MODE: PAIR_HUNT EXEC_ACTIVE_ONLY: True (only effective in PAIR_HUNT)
[t=0] REG30_BUILD done. active=64 exec_active_only=True mode=PAIR_HUNT
[t=0] PACK done. packed90_u64[0]=[9232960233523929384, 525330]
[t=0] COMMIT done. commit0=7ee6f4f8306e0b3f...
[t=0] DEDUP done. collisions=15 groups=49 active=49
[t=0] FREE/ALIAS applied. mode=PAIR_HUNT aliases_now=15 (FREE resets to 0 aliases next epoch)
--- NEXT instr_counter=1 ---
[t=1] REG30_BUILD done. active=49 exec_active_only=True mode=PAIR_HUNT
[t=1] PACK done. packed90_u64[0]=[9232960233523929384, 525330]
[t=1] COMMIT done. commit0=c1596df8c9ec1a9c...
[t=1] DEDUP done. collisions=12 groups=52 active=52
[t=1] FREE/ALIAS applied. mode=PAIR_HUNT aliases_now=12 (FREE resets to 0 aliases next epoch)
--- NEXT instr_counter=2 ---

=== Final State Snapshot ===
instr_counter: 2
initiator axis counts: [29, 12, 23] (0=x,1=y,2=z)
packed_views_u32[0]: [8413480, 8929704, 8405288]
packed90_u64[0]: [923296023352

In [None]:
# UniversalISA v0.7 (Colab single-cell)
# Major step toward v1.0 / Rust:
#   - Removes TensorFlow entirely; uses only NumPy + Python stdlib (hashlib).
#   - Keeps fixed shapes and explicit loops that map cleanly to Rust arrays/SIMD kernels.
#   - Defines a minimal IR/opcode format + interpreter running an instruction stream.
#   - Adds NEC-transforming "work" opcodes (NEC_ALU / PHASE_FLIP / ROT_AXES / XOR_SIGN / SHIFT_MAG).
#   - Preserves: 3 initiator views => 90-bit payload per qubit (3×30-bit), commitments, DEDUP, FREE/ALIAS, PAIR_HUNT.
#   - Preserves: triplet loops + scatter-style updates in COLLAPSE.
#   - Preserves: add/sub-only reorderable (canonicalized); mul/div fixed.
#
# NOTE: This is still a prototype. Deterministic cross-CPU/GPU parity will require fixed-point or strict FP mode by v1.0.

import numpy as np
import hashlib
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

# -----------------------------
# Fixed ISA geometry (v1.0 target constants)
# -----------------------------
Q = 64
AXES = 3
VIEWS = 3
SLOTS = 30
TRIPLETS = 10
EPS = 1e-6

TAU_HI = 1.0
TAU_LOW = -1.0
R_FOR_RATIO = 64.0

PRIME_MASK_30 = np.array(
    [0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1],
    dtype=np.uint8
)

TRIPLET_IDX = np.array([[3*t, 3*t+1, 3*t+2] for t in range(TRIPLETS)], dtype=np.int32)

# -----------------------------
# IR / Instruction model
# -----------------------------
@dataclass
class Instr:
    op: str
    args: Dict[str, Any] = field(default_factory=dict)

# -----------------------------
# Numpy helpers: packing
# -----------------------------
def pack30_to_u32_np(bits30_u8: np.ndarray) -> np.ndarray:
    # bits30_u8: [Q,30] uint8 {0,1} -> [Q] uint32
    shifts = (np.uint32(1) << np.arange(30, dtype=np.uint32))  # [30]
    return (bits30_u8.astype(np.uint32) * shifts[None, :]).sum(axis=1).astype(np.uint32)

def pack90_from_3x30_u32(packed3_u32: np.ndarray) -> np.ndarray:
    # packed3_u32: [Q,3] uint32 -> [Q,2] uint64 (90 bits packed)
    out = np.zeros((packed3_u32.shape[0], 2), dtype=np.uint64)
    for q in range(packed3_u32.shape[0]):
        b0 = np.uint64(packed3_u32[q,0] & np.uint32((1<<30)-1))
        b1 = np.uint64(packed3_u32[q,1] & np.uint32((1<<30)-1))
        b2 = np.uint64(packed3_u32[q,2] & np.uint32((1<<30)-1))
        low = b0 | (b1 << np.uint64(30)) | ((b2 & np.uint64(0xF)) << np.uint64(60))
        high = (b2 >> np.uint64(4))
        out[q,0] = low
        out[q,1] = high
    return out

def bitslice_30_np(bits30_u8: np.ndarray) -> np.ndarray:
    # bits30_u8: [Q,30] -> [30] uint64 lane masks (Q<=64)
    weights = (np.uint64(1) << np.arange(bits30_u8.shape[0], dtype=np.uint64))  # [Q]
    return (bits30_u8.astype(np.uint64).T * weights[None,:]).sum(axis=1).astype(np.uint64)

# -----------------------------
# Phase-dual selector ops (component-wise)
# -----------------------------
def add_pd(a, b): return a + b
def sub_pd(a, b): return a - b
def mul_pd(a, b): return a * b
def div_pd(a, b):
    out = np.zeros_like(a, dtype=np.float32)
    mask = np.abs(b) > EPS
    out[mask] = a[mask] / b[mask]
    return out

# -----------------------------
# NEC -> primaries (true phase-dual)
# Each axis magnitude m yields:
#   Real selector  = [+m, -m]
#   Unreal selector= [-m, +m]
# base_sign +1 => (Real, Unreal)
# base_sign -1 => (Unreal, Real)
# Output prim6: [Q,6,2] in canonical axis-pair order [X0,X1,Y0,Y1,Z0,Z1]
# -----------------------------
def build_primaries_from_nec(mag_xyz: np.ndarray, base_sign: np.ndarray) -> np.ndarray:
    mag = mag_xyz.astype(np.float32)  # [Q,3]
    # selectors per axis:
    # real:  [+m, -m], unreal: [-m, +m]
    real_sel = np.stack([mag, -mag], axis=2)   # [Q,3,2]
    unrl_sel = np.stack([-mag, mag], axis=2)   # [Q,3,2]
    sign_is_unreal = (base_sign < 0)           # [Q,3] bool

    first = np.where(sign_is_unreal[:,:,None], unrl_sel, real_sel)   # [Q,3,2]
    second= np.where(sign_is_unreal[:,:,None], real_sel, unrl_sel)   # [Q,3,2]

    X0,X1 = first[:,0,:], second[:,0,:]
    Y0,Y1 = first[:,1,:], second[:,1,:]
    Z0,Z1 = first[:,2,:], second[:,2,:]

    prim6 = np.stack([X0,X1,Y0,Y1,Z0,Z1], axis=1).astype(np.float32)  # [Q,6,2]
    return prim6

# -----------------------------
# Initiator selection from shared magnitude counts (full-array rule)
# Returns initiator axis per qubit in {0,1,2} for x,y,z
# -----------------------------
def initiator_from_shared_counts(mag_xyz: np.ndarray) -> np.ndarray:
    mag = mag_xyz.astype(np.int32)  # [Q,3]
    Qn = mag.shape[0]
    counts = np.zeros((Qn,3), dtype=np.int32)
    for ax in range(3):
        v = mag[:,ax]
        eq = (v[:,None] == v[None,:])
        counts[:,ax] = eq.sum(axis=1)

    shared = counts > 1
    shared_count = shared.sum(axis=1)  # [Q]

    # tie-break none-shared via bias x>y>z
    bias = np.array([2,1,0], dtype=np.int32)
    mags_biased = mag * 1000 + bias[None,:]

    initiator = np.zeros((Qn,), dtype=np.int32)
    for q in range(Qn):
        if shared_count[q] == 1:
            initiator[q] = int(np.argmax(counts[q]))
        elif shared_count[q] == 2:
            initiator[q] = int(np.argmax((counts[q] == 1).astype(np.int32)))
        elif shared_count[q] == 3:
            initiator[q] = int(np.argmax(counts[q]))
        else:
            initiator[q] = int(np.argmax(mags_biased[q]))
    return initiator

# -----------------------------
# Primaries permutation by initiator (cyclic pairs)
# prim6: [Q,6,2] in [X0,X1,Y0,Y1,Z0,Z1]
# init x => [X0,X1,Y0,Y1,Z0,Z1]
# init y => [Y0,Y1,Z0,Z1,X0,X1]
# init z => [Z0,Z1,X0,X1,Y0,Y1]
# -----------------------------
def permute_primaries_by_initiator(prim6: np.ndarray, initiator_axis: np.ndarray) -> np.ndarray:
    Qn = prim6.shape[0]
    idx_x = np.array([0,1,2,3,4,5], dtype=np.int32)
    idx_y = np.array([2,3,4,5,0,1], dtype=np.int32)
    idx_z = np.array([4,5,0,1,2,3], dtype=np.int32)
    out = np.empty_like(prim6)
    for q in range(Qn):
        a = initiator_axis[q]
        if a == 0:
            out[q] = prim6[q, idx_x]
        elif a == 1:
            out[q] = prim6[q, idx_y]
        else:
            out[q] = prim6[q, idx_z]
    return out

# -----------------------------
# REG30 build (10 triplets):
# Triplet0 = p0,p1,p2
# Triplet1 = p3,p4,p5
# Triplets2..9: [ADD(u,v), SUB(u,v), OP3(u,v)] where OP3 fixed MUL or DIV
# Canonicalize: only ADD/SUB reorder (swap if SUB.real > ADD.real). MUL/DIV fixed.
# Returns reg30 [Q,30,2], swap_flags [Q,8] uint8
# -----------------------------
def build_register30(prim6: np.ndarray, canonicalize_addsub: bool = True) -> Tuple[np.ndarray, np.ndarray]:
    p0,p1,p2,p3,p4,p5 = prim6[:,0],prim6[:,1],prim6[:,2],prim6[:,3],prim6[:,4],prim6[:,5]
    A0,A1 = p0,p1
    B0,B1 = p2,p3
    C0,C1 = p4,p5

    spec = [
        (A0, B0, "MUL"),
        (B0, C0, "MUL"),
        (A0, C0, "MUL"),
        (A1, B1, "MUL"),
        (A0, B1, "DIV"),
        (B0, C1, "DIV"),
        (A1, C0, "DIV"),
        (B1, C1, "DIV"),
    ]

    reg = np.empty((prim6.shape[0], 30, 2), dtype=np.float32)
    swap_flags = np.zeros((prim6.shape[0], 8), dtype=np.uint8)

    # primaries
    reg[:,0:6,:] = np.stack([p0,p1,p2,p3,p4,p5], axis=1)

    # interactions
    cursor = 6
    for i,(u,v,op3) in enumerate(spec):
        addv = add_pd(u,v)
        subv = sub_pd(u,v)
        opv = mul_pd(u,v) if op3=="MUL" else div_pd(u,v)

        if canonicalize_addsub:
            swap = (subv[:,0] > addv[:,0])  # [Q] bool
            swap_flags[:,i] = swap.astype(np.uint8)
            add2 = np.where(swap[:,None], subv, addv)
            sub2 = np.where(swap[:,None], addv, subv)
            addv, subv = add2, sub2

        reg[:,cursor+0,:] = addv
        reg[:,cursor+1,:] = subv
        reg[:,cursor+2,:] = opv
        cursor += 3

    return reg, swap_flags

# -----------------------------
# COLLAPSE (triplet loop + scatter-style update)
# individual collapse predicate per slot:
#   cond1 = real >= TAU_HI AND unreal <= TAU_LOW
#   cond2 = (real/unreal) > R_FOR_RATIO (guarding near-zero unreal)
# per triplet: if uniform, enforce uniform across that triplet; else keep individual
# Returns collapse_mask [Q,30] uint8
# -----------------------------
def detect_collapse_triplet_scatter(pairs: np.ndarray) -> np.ndarray:
    real = pairs[...,0]    # [Q,30]
    unreal = pairs[...,1]  # [Q,30]
    cond1 = (real >= TAU_HI) & (unreal <= TAU_LOW)

    ratio = np.zeros_like(real, dtype=np.float32)
    safe = np.abs(unreal) > EPS
    ratio[safe] = real[safe] / unreal[safe]
    cond2 = ratio > R_FOR_RATIO

    individual = (cond1 | cond2)  # [Q,30] bool
    final_mask = individual.astype(np.uint8).copy()

    # triplet loop + scatter update
    for t in range(TRIPLETS):
        idx3 = TRIPLET_IDX[t]  # [3]
        trip = individual[:, idx3]  # [Q,3] bool
        is_uniform = np.all(trip == trip[:,0:1], axis=1)  # [Q] bool
        uniform_val = trip[:,0].astype(np.uint8)          # [Q]

        updates = np.where(is_uniform[:,None], np.tile(uniform_val[:,None], (1,3)), trip.astype(np.uint8))  # [Q,3]

        q_idx = np.repeat(np.arange(individual.shape[0], dtype=np.int32), 3)
        p_idx = np.tile(idx3.astype(np.int32), individual.shape[0])
        final_mask[q_idx, p_idx] = updates.reshape(-1)

    return final_mask

# -----------------------------
# PARITY + BITMAP
# affected = prime OR collapse; sign flip on both components
# bitmap bit=1 if rotated.real > EPS else 0
# -----------------------------
def apply_parity_rotation(pairs: np.ndarray, collapse: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    prime = np.broadcast_to(PRIME_MASK_30[None,:], collapse.shape).astype(np.uint8)
    affected = ((prime > 0) | (collapse > 0)).astype(np.uint8)
    sign = np.where(affected > 0, -1.0, 1.0).astype(np.float32)  # [Q,30]
    rotated = pairs * sign[...,None]
    return rotated.astype(np.float32), affected.astype(np.uint8)

def bitmap(rotated_pairs: np.ndarray) -> np.ndarray:
    return (rotated_pairs[...,0] > EPS).astype(np.uint8)  # [Q,30]

# -----------------------------
# Commit/tag/dedup/free
# Commit includes: instr_counter + 3 view words + initiator + swapcounts
# -----------------------------
def commit_full90(packed_u32_xyz: np.ndarray,
                  initiator_axis: np.ndarray,
                  swapcount_xyz: np.ndarray,
                  instr_counter: int,
                  domain_sep: bytes = b"NTHISA90") -> List[bytes]:
    commits = []
    for q in range(packed_u32_xyz.shape[0]):
        msg = (
            domain_sep +
            int(instr_counter).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,0]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,1]).to_bytes(4,"little",signed=False) +
            int(packed_u32_xyz[q,2]).to_bytes(4,"little",signed=False) +
            int(initiator_axis[q]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,0]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,1]).to_bytes(1,"little",signed=False) +
            int(swapcount_xyz[q,2]).to_bytes(1,"little",signed=False)
        )
        commits.append(hashlib.blake2s(msg, digest_size=32).digest())
    return commits

def tag_u64_from_commit(commit32: bytes, q_idx: int, instr_counter: int) -> np.uint64:
    msg = b"NTH_TAG0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    h = hashlib.blake2s(msg, digest_size=8).digest()
    return np.uint64(int.from_bytes(h, "little", signed=False))

@dataclass
class DedupResult:
    winner_of: np.ndarray
    active_mask: np.ndarray
    groups: Dict[bytes, List[int]]
    freed: List[int]
    collision_qubits: int

def dedup_by_commit(commits: List[bytes], efficiency_score: np.ndarray, mode: str) -> DedupResult:
    groups: Dict[bytes, List[int]] = {}
    for q,c in enumerate(commits):
        groups.setdefault(c, []).append(q)

    winner_of = np.arange(len(commits), dtype=np.int32)
    active_mask = np.ones(len(commits), dtype=bool)
    freed: List[int] = []
    collision_qubits = 0

    for c, qs in groups.items():
        if len(qs) > 1:
            collision_qubits += (len(qs) - 1)
        qs_sorted = sorted(qs, key=lambda q: (int(efficiency_score[q]), q))
        w = qs_sorted[0]
        for q in qs:
            winner_of[q] = w
            if q != w:
                active_mask[q] = False
                if mode == "FREE":
                    freed.append(q)

    return DedupResult(winner_of, active_mask, groups, freed, collision_qubits)

def derive_new_nec_for_freed(commit32: bytes, q_idx: int, instr_counter: int) -> Tuple[np.ndarray, np.ndarray]:
    msg = b"NTH_FREE0" + commit32 + int(q_idx).to_bytes(2,"little",signed=False) + int(instr_counter).to_bytes(4,"little",signed=False)
    raw = hashlib.blake2s(msg, digest_size=16).digest()
    mags = np.array([raw[0] % 64, raw[1] % 64, raw[2] % 64], dtype=np.int32)
    sb = raw[3]
    signs = np.array([1 if (sb & 1) else -1, 1 if (sb & 2) else -1, 1 if (sb & 4) else -1], dtype=np.int32)
    return mags, signs

# -----------------------------
# Interpreter state (Rust-friendly register file)
# -----------------------------
@dataclass
class ISAState:
    instr_counter: int = 0
    mode: str = "FREE"                 # "FREE" or "PAIR_HUNT"
    exec_active_only: bool = False     # only meaningful in PAIR_HUNT after DEDUP

    # NEC registers
    mag_xyz: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.int32))
    base_sign: np.ndarray = field(default_factory=lambda: np.ones((Q,3), dtype=np.int32))

    # Scheduling registers
    dedup: Optional[DedupResult] = None
    active_mask: np.ndarray = field(default_factory=lambda: np.ones((Q,), dtype=bool))
    winner_of: np.ndarray = field(default_factory=lambda: np.arange(Q, dtype=np.int32))

    # Derived registers (pipeline)
    initiator: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.int32))             # [Q]
    reg30_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,30,2), dtype=np.float32))   # [Q,3,30,2]
    swapflags_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,8), dtype=np.uint8))    # [Q,3,8]
    collapse_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,30), dtype=np.uint8))    # [Q,3,30]
    parity_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,30), dtype=np.uint8))      # [Q,3,30]
    rotated_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,30,2), dtype=np.float32)) # [Q,3,30,2]
    bits_views: np.ndarray = field(default_factory=lambda: np.zeros((Q,3,30), dtype=np.uint8))        # [Q,3,30]
    packed_views_u32: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.uint32))    # [Q,3]
    packed90_u64: np.ndarray = field(default_factory=lambda: np.zeros((Q,2), dtype=np.uint64))        # [Q,2]
    swapcount_xyz: np.ndarray = field(default_factory=lambda: np.zeros((Q,3), dtype=np.uint8))        # [Q,3]
    commits: List[bytes] = field(default_factory=lambda: [b""]*Q)
    tags_u64: np.ndarray = field(default_factory=lambda: np.zeros((Q,), dtype=np.uint64))

    # Side-effect barrier: if True, forbid DEDUP optimization for this epoch
    barrier: bool = False

# -----------------------------
# Helper: decide active-only compute
# -----------------------------
def should_active_only(st: ISAState) -> bool:
    return st.exec_active_only and (st.dedup is not None) and (st.mode == "PAIR_HUNT") and (not st.barrier)

# -----------------------------
# Work opcodes (NEC-transforming)
# These are the beginning of conventional ISA mapping.
# They operate directly on NEC state (mag_xyz/base_sign), and are deterministic.
# -----------------------------
def nec_alu_mag(mag_xyz: np.ndarray, op: str, dst: int, src: int, imm: Optional[int] = None, mod: int = 64):
    # op in {"ADD","SUB","MUL","DIV","XOR"} on magnitudes only
    if imm is not None:
        rhs = int(imm)
        vec = mag_xyz[:,dst].astype(np.int32)
        if op == "ADD":
            mag_xyz[:,dst] = (vec + rhs) % mod
        elif op == "SUB":
            mag_xyz[:,dst] = (vec - rhs) % mod
        elif op == "MUL":
            mag_xyz[:,dst] = (vec * rhs) % mod
        elif op == "DIV":
            mag_xyz[:,dst] = (vec // max(rhs,1)) % mod
        elif op == "XOR":
            mag_xyz[:,dst] = (vec ^ rhs) % mod
        else:
            raise ValueError(op)
        return

    a = mag_xyz[:,dst].astype(np.int32)
    b = mag_xyz[:,src].astype(np.int32)
    if op == "ADD":
        mag_xyz[:,dst] = (a + b) % mod
    elif op == "SUB":
        mag_xyz[:,dst] = (a - b) % mod
    elif op == "MUL":
        mag_xyz[:,dst] = (a * b) % mod
    elif op == "DIV":
        mag_xyz[:,dst] = (a // np.maximum(b,1)) % mod
    elif op == "XOR":
        mag_xyz[:,dst] = (a ^ b) % mod
    else:
        raise ValueError(op)

def nec_phase_flip(base_sign: np.ndarray, axis: int):
    base_sign[:,axis] *= -1

def nec_rot_axes(mag_xyz: np.ndarray, base_sign: np.ndarray, direction: str = "R"):
    # rotate (x,y,z) as a structural operation: shifts both mag and sign
    if direction == "R":  # (x,y,z)->(z,x,y)
        mag_xyz[:] = mag_xyz[:,[2,0,1]]
        base_sign[:] = base_sign[:,[2,0,1]]
    else:                # "L": (x,y,z)->(y,z,x)
        mag_xyz[:] = mag_xyz[:,[1,2,0]]
        base_sign[:] = base_sign[:,[1,2,0]]

def nec_shift_mag(mag_xyz: np.ndarray, axis: int, sh: int, mod: int = 64):
    # logical shift on magnitude (wrap mod); useful as prototype for SHL/SHR
    if sh >= 0:
        mag_xyz[:,axis] = ((mag_xyz[:,axis].astype(np.int32) << sh) % mod).astype(np.int32)
    else:
        mag_xyz[:,axis] = ((mag_xyz[:,axis].astype(np.int32) >> (-sh)) % mod).astype(np.int32)

# -----------------------------
# Pipeline stage ops (views + transcode)
# -----------------------------
def stage_INIT_SELECT(st: ISAState):
    st.initiator = initiator_from_shared_counts(st.mag_xyz)

def stage_REG30_BUILD(st: ISAState):
    # Always compute initiator from full array before building views
    stage_INIT_SELECT(st)

    prim0 = build_primaries_from_nec(st.mag_xyz, st.base_sign)  # [Q,6,2]

    # views always: x/y/z initiators (global)
    init_x = np.zeros((Q,), dtype=np.int32)
    init_y = np.ones((Q,), dtype=np.int32)
    init_z = np.full((Q,), 2, dtype=np.int32)

    active_only = should_active_only(st)
    if active_only:
        active_idx = np.where(st.active_mask)[0].astype(np.int32)
        alias_idx = np.where(~st.active_mask)[0].astype(np.int32)

        # Build only for active indices
        prim0_a = prim0[active_idx]

        for v,(initv) in enumerate([init_x, init_y, init_z]):
            prim_v = permute_primaries_by_initiator(prim0_a, initv[active_idx])
            reg30, swaps = build_register30(prim_v, canonicalize_addsub=True)
            st.reg30_views[active_idx, v] = reg30
            st.swapflags_views[active_idx, v] = swaps

        # aliases left as zeros; will be filled later (PAIR_HUNT copies)
    else:
        for v,(initv) in enumerate([init_x, init_y, init_z]):
            prim_v = permute_primaries_by_initiator(prim0, initv)
            reg30, swaps = build_register30(prim_v, canonicalize_addsub=True)
            st.reg30_views[:,v] = reg30
            st.swapflags_views[:,v] = swaps

    st.swapcount_xyz = st.swapflags_views.sum(axis=2).astype(np.uint8)  # [Q,3]

def stage_COLLAPSE(st: ISAState):
    active_only = should_active_only(st)
    if active_only:
        active_idx = np.where(st.active_mask)[0].astype(np.int32)
        for v in range(3):
            st.collapse_views[active_idx, v] = detect_collapse_triplet_scatter(st.reg30_views[active_idx, v])
    else:
        for v in range(3):
            st.collapse_views[:, v] = detect_collapse_triplet_scatter(st.reg30_views[:, v])

def stage_PARITY(st: ISAState):
    active_only = should_active_only(st)
    if active_only:
        active_idx = np.where(st.active_mask)[0].astype(np.int32)
        for v in range(3):
            rot, par = apply_parity_rotation(st.reg30_views[active_idx, v], st.collapse_views[active_idx, v])
            st.rotated_views[active_idx, v] = rot
            st.parity_views[active_idx, v] = par
    else:
        for v in range(3):
            rot, par = apply_parity_rotation(st.reg30_views[:, v], st.collapse_views[:, v])
            st.rotated_views[:, v] = rot
            st.parity_views[:, v] = par

def stage_BITMAP(st: ISAState):
    active_only = should_active_only(st)
    if active_only:
        active_idx = np.where(st.active_mask)[0].astype(np.int32)
        alias_idx = np.where(~st.active_mask)[0].astype(np.int32)

        for v in range(3):
            st.bits_views[active_idx, v] = bitmap(st.rotated_views[active_idx, v])

        # copy to aliases (PAIR_HUNT)
        for q in alias_idx:
            w = st.winner_of[q]
            st.bits_views[q,:,:] = st.bits_views[w,:,:]
    else:
        for v in range(3):
            st.bits_views[:, v] = bitmap(st.rotated_views[:, v])

def stage_PACK(st: ISAState):
    for v in range(3):
        st.packed_views_u32[:,v] = pack30_to_u32_np(st.bits_views[:,v])
    st.packed90_u64 = pack90_from_3x30_u32(st.packed_views_u32)

def stage_COMMIT(st: ISAState):
    st.commits = commit_full90(st.packed_views_u32,
                              st.initiator.astype(np.uint8),
                              st.swapcount_xyz.astype(np.uint8),
                              st.instr_counter)
    st.tags_u64 = np.array([tag_u64_from_commit(st.commits[q], q, st.instr_counter) for q in range(Q)], dtype=np.uint64)

def stage_DEDUP(st: ISAState):
    if st.barrier:
        # If a side-effect barrier is active, do not dedup this epoch
        st.dedup = None
        st.active_mask[:] = True
        st.winner_of[:] = np.arange(Q, dtype=np.int32)
        return
    eff = st.swapcount_xyz.sum(axis=1).astype(np.int32)
    st.dedup = dedup_by_commit(st.commits, eff, st.mode)
    st.active_mask = st.dedup.active_mask.copy()
    st.winner_of = st.dedup.winner_of.copy()

def stage_FREE_ALIAS(st: ISAState):
    if st.dedup is None:
        return

    alias_idx = np.where(~st.active_mask)[0].astype(np.int32)
    if st.mode == "PAIR_HUNT":
        # aliases remain inactive and inherit winner NEC (stay paired)
        for q in alias_idx:
            w = st.winner_of[q]
            st.mag_xyz[q] = st.mag_xyz[w]
            st.base_sign[q] = st.base_sign[w]
    else:
        # FREE: assign new NEC work to aliases and reset epoch (everyone active next instruction)
        for q in alias_idx:
            mags, signs = derive_new_nec_for_freed(st.commits[q], q, st.instr_counter + 1)
            st.mag_xyz[q] = mags
            st.base_sign[q] = signs
        st.active_mask[:] = True
        st.winner_of[:] = np.arange(Q, dtype=np.int32)
        st.dedup = None

def stage_NEXT(st: ISAState):
    st.instr_counter += 1
    st.barrier = False  # barrier is per-epoch

# -----------------------------
# Opcode dispatch
# -----------------------------
def op_NEC_LOAD(st: ISAState, seed: int = 11, mag_range: int = 64):
    rng = np.random.default_rng(seed + st.instr_counter)
    st.mag_xyz[:] = rng.integers(0, mag_range, size=(Q,3), dtype=np.int32)
    sb = rng.integers(0, 2, size=(Q,3), dtype=np.int32)
    st.base_sign[:] = np.where(sb > 0, 1, -1).astype(np.int32)
    st.active_mask[:] = True
    st.winner_of[:] = np.arange(Q, dtype=np.int32)
    st.dedup = None

def op_NEC_ALU(st: ISAState, op: str, dst: int, src: Optional[int] = None, imm: Optional[int] = None, mod: int = 64):
    if src is None and imm is None:
        raise ValueError("NEC_ALU requires src or imm")
    nec_alu_mag(st.mag_xyz, op=op, dst=int(dst), src=int(src) if src is not None else 0, imm=imm, mod=mod)

def op_PHASE_FLIP(st: ISAState, axis: int):
    nec_phase_flip(st.base_sign, int(axis))

def op_ROT_AXES(st: ISAState, direction: str = "R"):
    nec_rot_axes(st.mag_xyz, st.base_sign, direction=direction)

def op_SHIFT_MAG(st: ISAState, axis: int, sh: int):
    nec_shift_mag(st.mag_xyz, axis=int(axis), sh=int(sh), mod=64)

def op_BARRIER(st: ISAState):
    # Side-effect barrier placeholder: forbids DEDUP optimization for this epoch.
    st.barrier = True

# Pipeline ops:
def op_INIT_SELECT(st: ISAState): stage_INIT_SELECT(st)
def op_REG30_BUILD(st: ISAState): stage_REG30_BUILD(st)
def op_COLLAPSE(st: ISAState): stage_COLLAPSE(st)
def op_PARITY(st: ISAState): stage_PARITY(st)
def op_BITMAP(st: ISAState): stage_BITMAP(st)
def op_PACK(st: ISAState): stage_PACK(st)
def op_COMMIT(st: ISAState): stage_COMMIT(st)
def op_DEDUP(st: ISAState): stage_DEDUP(st)
def op_FREE_ALIAS(st: ISAState): stage_FREE_ALIAS(st)
def op_EXEC_ACTIVE_ONLY(st: ISAState, enabled: bool = True): st.exec_active_only = bool(enabled)
def op_NEXT(st: ISAState): stage_NEXT(st)

OP_TABLE = {
    "NEC_LOAD": op_NEC_LOAD,
    "NEC_ALU": op_NEC_ALU,
    "PHASE_FLIP": op_PHASE_FLIP,
    "ROT_AXES": op_ROT_AXES,
    "SHIFT_MAG": op_SHIFT_MAG,
    "BARRIER": op_BARRIER,

    "INIT_SELECT": op_INIT_SELECT,
    "REG30_BUILD": op_REG30_BUILD,
    "COLLAPSE": op_COLLAPSE,
    "PARITY": op_PARITY,
    "BITMAP": op_BITMAP,
    "PACK": op_PACK,
    "COMMIT": op_COMMIT,
    "DEDUP": op_DEDUP,
    "FREE_ALIAS": op_FREE_ALIAS,
    "EXEC_ACTIVE_ONLY": op_EXEC_ACTIVE_ONLY,
    "NEXT": op_NEXT,
}

def run_program(st: ISAState, program: List[Instr], verbose: bool = True):
    for ins in program:
        fn = OP_TABLE.get(ins.op)
        if fn is None:
            raise ValueError(f"Unknown opcode: {ins.op}")
        fn(st, **ins.args)

        if verbose and ins.op in ("NEC_ALU","PHASE_FLIP","ROT_AXES","SHIFT_MAG","PACK","COMMIT","DEDUP","FREE_ALIAS","NEXT"):
            if ins.op in ("NEC_ALU","PHASE_FLIP","ROT_AXES","SHIFT_MAG"):
                print(f"[t={st.instr_counter}] {ins.op} {ins.args}")
            elif ins.op == "PACK":
                print(f"[t={st.instr_counter}] PACK packed90[0]={st.packed90_u64[0].tolist()}")
            elif ins.op == "COMMIT":
                print(f"[t={st.instr_counter}] COMMIT commit0={st.commits[0].hex()[:16]}...")
            elif ins.op == "DEDUP":
                if st.dedup is None:
                    print(f"[t={st.instr_counter}] DEDUP skipped (barrier). active={int(st.active_mask.sum())}")
                else:
                    print(f"[t={st.instr_counter}] DEDUP collisions={st.dedup.collision_qubits} groups={len(st.dedup.groups)} active={int(st.active_mask.sum())}")
            elif ins.op == "FREE_ALIAS":
                aliases = Q - int(st.active_mask.sum())
                print(f"[t={st.instr_counter}] FREE_ALIAS mode={st.mode} aliases_now={aliases}")
            elif ins.op == "NEXT":
                print(f"--- NEXT instr_counter={st.instr_counter} ---")

# -----------------------------
# Demo microprograms
# -----------------------------
def make_measurement_block() -> List[Instr]:
    return [
        Instr("REG30_BUILD"),
        Instr("COLLAPSE"),
        Instr("PARITY"),
        Instr("BITMAP"),
        Instr("PACK"),
        Instr("COMMIT"),
        Instr("DEDUP"),
        Instr("FREE_ALIAS"),
    ]

# Example: "x86-like" ALU microprogram sketch (pure NEC ops)
# We'll do:
#   - LOAD NEC
#   - (pretend) ADD x += y, XOR z ^= x, SHIFT y << 1, PHASE_FLIP x, ROT axes
#   - measurement/transcode + dedup
#   - next epoch, repeat with exec_active_only depending on mode
program = [
    Instr("NEC_LOAD", {"seed": 11}),
    Instr("EXEC_ACTIVE_ONLY", {"enabled": True}),  # only impacts PAIR_HUNT mode after dedup
    # instruction 0 "work":
    Instr("NEC_ALU", {"op":"ADD", "dst":0, "src":1}),     # x = x + y
    Instr("NEC_ALU", {"op":"XOR", "dst":2, "src":0}),     # z = z XOR x
    Instr("SHIFT_MAG", {"axis":1, "sh":1}),               # y <<= 1
    Instr("PHASE_FLIP", {"axis":0}),                      # flip x phase initiation
    Instr("ROT_AXES", {"direction":"R"}),                 # rotate axes (z,x,y)
    *make_measurement_block(),
    Instr("NEXT"),

    # instruction 1 "work": include a barrier to demonstrate dedup suppression
    Instr("NEC_ALU", {"op":"MUL", "dst":0, "src":2}),     # x = x * z (mod 64)
    Instr("BARRIER"),                                     # emulate a side-effectful op: skip dedup this epoch
    *make_measurement_block(),
    Instr("NEXT"),
]

# -----------------------------
# Run demo in FREE then in PAIR_HUNT
# -----------------------------
for mode in ("FREE", "PAIR_HUNT"):
    print("\n==============================")
    print("UniversalISA v0.7 DEMO | MODE:", mode)
    st = ISAState(mode=mode, exec_active_only=True, instr_counter=0)
    np.random.seed(11)
    run_program(st, program, verbose=True)

    print("\n--- Final Snapshot ---")
    print("instr_counter:", st.instr_counter, "mode:", st.mode, "exec_active_only:", st.exec_active_only)
    print("initiator counts:", np.bincount(st.initiator, minlength=3).tolist(), "(0=x,1=y,2=z)")
    print("active:", int(st.active_mask.sum()), "aliases:", Q-int(st.active_mask.sum()))
    print("packed_views_u32[0]:", st.packed_views_u32[0].tolist())
    print("packed90_u64[0]:", st.packed90_u64[0].tolist())
    print("tag[0]:", int(st.tags_u64[0]))
    print("commit0:", st.commits[0].hex()[:32], "...")
    print("sample NEC (q0): mag", st.mag_xyz[0].tolist(), "sign", st.base_sign[0].tolist(), "winner", int(st.winner_of[0]))


UniversalISA v0.7 DEMO | MODE: FREE
[t=0] NEC_ALU {'op': 'ADD', 'dst': 0, 'src': 1}
[t=0] NEC_ALU {'op': 'XOR', 'dst': 2, 'src': 0}
[t=0] SHIFT_MAG {'axis': 1, 'sh': 1}
[t=0] PHASE_FLIP {'axis': 0}
[t=0] ROT_AXES {'direction': 'R'}
[t=0] PACK packed90[0]=[9306275394515781668, 37781656]
[t=0] COMMIT commit0=d7dc49e8fdfca611...
[t=0] DEDUP collisions=21 groups=43 active=43
[t=0] FREE_ALIAS mode=FREE aliases_now=0
--- NEXT instr_counter=1 ---
[t=1] NEC_ALU {'op': 'MUL', 'dst': 0, 'src': 2}
[t=1] PACK packed90[0]=[9306275394515781668, 37781656]
[t=1] COMMIT commit0=dfc5fc05d1326fdc...
[t=1] DEDUP skipped (barrier). active=64
[t=1] FREE_ALIAS mode=FREE aliases_now=0
--- NEXT instr_counter=2 ---

--- Final Snapshot ---
instr_counter: 2 mode: FREE exec_active_only: True
initiator counts: [21, 26, 17] (0=x,1=y,2=z)
active: 64 aliases: 0
packed_views_u32[0]: [538069028, 77209768, 604506504]
packed90_u64[0]: [9306275394515781668, 37781656]
tag[0]: 2288642141128177132
commit0: dfc5fc05d1326fdcd3

v0.8 Rust Refactor

In [None]:
```bash
%%bash
set -e

# Ensure Rust toolchain
if ! command -v cargo >/dev/null 2>&1; then
  curl https://sh.rustup.rs -sSf | sh -s -- -y
  source $HOME/.cargo/env
fi

rm -rf uisa_v08
mkdir -p uisa_v08/src

cat > uisa_v08/Cargo.toml <<'TOML'
[package]
name = "uisa_v08"
version = "0.8.1"
edition = "2021"

[dependencies]
blake2 = "0.10"
TOML

cat > uisa_v08/src/main.rs <<'RS'
use blake2::{Blake2s256, Digest};
use std::collections::HashMap;

// ===========================
// UniversalISA v0.8.1 Constants
// ===========================
const Q: usize = 64;
const SLOTS: usize = 30;
const VIEWS: usize = 3;
const TRIPLETS: usize = 10;

const EPS: f32 = 1e-6;
const TAU_HI: f32 = 1.0;
const TAU_LOW: f32 = -1.0;
const R_FOR_RATIO: f32 = 64.0;

const PRIME_MASK_30: [u8; 30] = [
    0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1
];

fn triplet_idx(t: usize) -> [usize;3] { [3*t, 3*t+1, 3*t+2] }

// ===========================
// Typed GPR model: QubitMask
// ===========================
#[derive(Clone, Copy, Debug)]
struct QubitMask(u64);

impl QubitMask {
    fn all() -> Self { QubitMask(!0u64) }
    fn only(q: usize) -> Self { QubitMask(1u64 << q) }
    fn contains(&self, q: usize) -> bool { ((self.0 >> q) & 1) != 0 }
}

// ===========================
// Memory model (simulated + real controller skeleton)
// ===========================
struct SimMemory { buf: Vec<u8> }
impl SimMemory {
    fn new(size: usize) -> Self { Self { buf: vec![0u8; size] } }
    fn read_u32(&self, addr: usize) -> u32 {
        let b = &self.buf[addr..addr+4];
        u32::from_le_bytes([b[0],b[1],b[2],b[3]])
    }
    fn write_u32(&mut self, addr: usize, v: u32) {
        let b = v.to_le_bytes();
        self.buf[addr..addr+4].copy_from_slice(&b);
    }
}

trait MemoryController {
    fn read(&self, addr: u64, len: usize) -> Vec<u8>;
    fn write(&mut self, addr: u64, data: &[u8]);
}
struct PlaceholderMemController;
impl MemoryController for PlaceholderMemController {
    fn read(&self, _addr: u64, len: usize) -> Vec<u8> { vec![0u8; len] }
    fn write(&mut self, _addr: u64, _data: &[u8]) {}
}

// ===========================
// Canonical 16-byte Bytecode
// ===========================
#[repr(u8)]
#[derive(Clone, Copy, Debug)]
enum Opcode {
    NecLoad = 0x01,
    NecAlu  = 0x02,     // imm8: ADD=0,SUB=1,MUL=2,DIV=3,XOR=4, ADDI=5 uses imm32
    PhaseFlip = 0x03,
    RotAxes   = 0x04,   // imm8: 0=R,1=L
    ShiftMag  = 0x05,   // imm32 shift signed
    Barrier   = 0x06,

    Pack       = 0x15,  // triggers fused stage_measure_transcode
    Dedup      = 0x17,
    FreeAlias  = 0x18,
    Next       = 0x19,

    LoadMemU32 = 0x20,  // aux32=addr, XOR into x magnitudes
    StoreMemU32= 0x21,  // aux32=addr, store packed_views_u32[q,0] from first masked qubit
}

#[derive(Clone, Copy)]
struct DecodedInstr {
    op: Opcode,
    flags: u8,
    gpr_mask: usize,
    dst_axis: u8,
    src_axis: u8,
    imm8: u8,
    imm32: i32,
    aux32: u32,
}

fn decode16(bytes: &[u8]) -> DecodedInstr {
    assert!(bytes.len() == 16);
    let op = match bytes[0] {
        0x01 => Opcode::NecLoad,
        0x02 => Opcode::NecAlu,
        0x03 => Opcode::PhaseFlip,
        0x04 => Opcode::RotAxes,
        0x05 => Opcode::ShiftMag,
        0x06 => Opcode::Barrier,
        0x15 => Opcode::Pack,
        0x17 => Opcode::Dedup,
        0x18 => Opcode::FreeAlias,
        0x19 => Opcode::Next,
        0x20 => Opcode::LoadMemU32,
        0x21 => Opcode::StoreMemU32,
        _ => panic!("unknown opcode {}", bytes[0]),
    };
    let flags = bytes[1];
    let gpr_mask = bytes[2] as usize;
    let dst_axis = bytes[3];
    let src_axis = bytes[4];
    let imm8 = bytes[5];
    let imm32 = i32::from_le_bytes([bytes[8],bytes[9],bytes[10],bytes[11]]);
    let aux32 = u32::from_le_bytes([bytes[12],bytes[13],bytes[14],bytes[15]]);
    DecodedInstr { op, flags, gpr_mask, dst_axis, src_axis, imm8, imm32, aux32 }
}

fn emit(op: Opcode, flags: u8, gpr: u8, dst: u8, src: u8, imm8: u8, imm32: i32, aux32: u32) -> [u8;16] {
    let mut b = [0u8;16];
    b[0] = op as u8;
    b[1] = flags;
    b[2] = gpr;
    b[3] = dst;
    b[4] = src;
    b[5] = imm8;
    b[8..12].copy_from_slice(&imm32.to_le_bytes());
    b[12..16].copy_from_slice(&aux32.to_le_bytes());
    b
}

// ===========================
// ISA State
// ===========================
#[derive(Clone)]
struct ISAState {
    instr_counter: u32,
    mode_pair_hunt: bool,
    exec_active_only: bool,
    barrier: bool,

    gpr_masks: [QubitMask; 8],

    mag_xyz: [[i32;3]; Q],
    base_sign: [[i32;3]; Q],

    active_mask: [bool; Q],
    winner_of: [usize; Q],

    initiator: [u8; Q],

    reg30_views: [[[[f32;2]; SLOTS]; VIEWS]; Q],
    swapcount_xyz: [[u8;3]; Q],

    collapse_views: [[[u8; SLOTS]; VIEWS]; Q],
    parity_views:  [[[u8; SLOTS]; VIEWS]; Q],
    rotated_views: [[[[f32;2]; SLOTS]; VIEWS]; Q],
    bits_views:    [[[u8; SLOTS]; VIEWS]; Q],

    packed_views_u32: [[u32;3]; Q],
    packed90_u64: [[u64;2]; Q],

    commits: [[u8;32]; Q],
    tags_u64: [u64; Q],
}

impl ISAState {
    fn new() -> Self {
        ISAState {
            instr_counter: 0,
            mode_pair_hunt: false,
            exec_active_only: false,
            barrier: false,
            gpr_masks: [QubitMask(0); 8],
            mag_xyz: [[0;3]; Q],
            base_sign: [[1;3]; Q],
            active_mask: [true; Q],
            winner_of: core::array::from_fn(|i| i),
            initiator: [0u8; Q],
            reg30_views: [[[[0.0;2]; SLOTS]; VIEWS]; Q],
            swapcount_xyz: [[0u8;3]; Q],
            collapse_views: [[[0u8; SLOTS]; VIEWS]; Q],
            parity_views:  [[[0u8; SLOTS]; VIEWS]; Q],
            rotated_views: [[[[0.0;2]; SLOTS]; VIEWS]; Q],
            bits_views:    [[[0u8; SLOTS]; VIEWS]; Q],
            packed_views_u32: [[0u32;3]; Q],
            packed90_u64: [[0u64;2]; Q],
            commits: [[0u8;32]; Q],
            tags_u64: [0u64; Q],
        }
    }
    fn active_only_allowed(&self) -> bool {
        self.exec_active_only && self.mode_pair_hunt && !self.barrier
    }
}

// ===========================
// Core functions (subset of v0.7 in Rust)
// ===========================
fn initiator_from_shared_counts(mag: &[[i32;3]; Q], mask: QubitMask) -> [u8; Q] {
    let mut init = [0u8; Q];
    let mut counts = [[0i32;3]; Q];

    for axis in 0..3 {
        for q in 0..Q {
            if !mask.contains(q) { continue; }
            let v = mag[q][axis];
            let mut c = 0i32;
            for k in 0..Q {
                if !mask.contains(k) { continue; }
                if mag[k][axis] == v { c += 1; }
            }
            counts[q][axis] = c;
        }
    }

    for q in 0..Q {
        if !mask.contains(q) { continue; }
        let cx = counts[q][0];
        let cy = counts[q][1];
        let cz = counts[q][2];
        let shared = [cx>1, cy>1, cz>1];
        let shared_count = shared.iter().filter(|&&b| b).count();

        let argmax_counts = {
            let mut best = 0usize;
            let mut bestv = counts[q][0];
            for a in 1..3 {
                if counts[q][a] > bestv { best=a; bestv=counts[q][a]; }
            }
            best as u8
        };

        if shared_count == 1 {
            init[q] = argmax_counts;
        } else if shared_count == 2 {
            let mut odd = 0usize;
            for a in 0..3 {
                if counts[q][a] == 1 { odd=a; break; }
            }
            init[q] = odd as u8;
        } else if shared_count == 3 {
            init[q] = argmax_counts;
        } else {
            // none shared: max magnitude tie-break x>y>z via bias (x=2,y=1,z=0)
            let v0 = mag[q][0]*1000 + 2;
            let v1 = mag[q][1]*1000 + 1;
            let v2 = mag[q][2]*1000 + 0;
            let mut best = 0usize;
            let mut bestv = v0;
            if v1 > bestv { best=1; bestv=v1; }
            if v2 > bestv { best=2; bestv=v2; }
            init[q] = best as u8;
        }
    }

    init
}

fn build_primaries_from_nec_for_qubit(mag: [i32;3], sign: [i32;3]) -> [[f32;2]; 6] {
    let mut out = [[0.0f32;2]; 6];
    for axis in 0..3 {
        let m = mag[axis] as f32;
        let real = [ m, -m];
        let unrl = [-m,  m];
        let (first, second) = if sign[axis] < 0 { (unrl, real) } else { (real, unrl) };
        out[2*axis + 0] = first;
        out[2*axis + 1] = second;
    }
    out
}

fn permute_primaries_by_initiator(prim6: [[f32;2];6], init_axis: u8) -> [[f32;2];6] {
    let idx_x = [0usize,1,2,3,4,5];
    let idx_y = [2usize,3,4,5,0,1];
    let idx_z = [4usize,5,0,1,2,3];
    let idx = match init_axis { 0 => idx_x, 1 => idx_y, _ => idx_z };
    let mut out = [[0.0f32;2];6];
    for i in 0..6 { out[i] = prim6[idx[i]]; }
    out
}

fn add_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]+b[0], a[1]+b[1]] }
fn sub_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]-b[0], a[1]-b[1]] }
fn mul_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]*b[0], a[1]*b[1]] }
fn div_pd(a: [f32;2], b: [f32;2]) -> [f32;2] {
    let mut out = [0.0f32;2];
    for i in 0..2 {
        if b[i].abs() > EPS { out[i] = a[i] / b[i]; }
    }
    out
}

fn build_register30_for_qubit(prim6: [[f32;2];6], swap_sum_out: &mut u8) -> [[f32;2]; SLOTS] {
    let (p0,p1,p2,p3,p4,p5) = (prim6[0],prim6[1],prim6[2],prim6[3],prim6[4],prim6[5]);
    let (a0,a1) = (p0,p1);
    let (b0,b1) = (p2,p3);
    let (c0,c1) = (p4,p5);

    let spec: [([f32;2],[f32;2],u8);8] = [
        (a0,b0,0), (b0,c0,0), (a0,c0,0), (a1,b1,0),
        (a0,b1,1), (b0,c1,1), (a1,c0,1), (b1,c1,1),
    ];

    let mut reg = [[0.0f32;2]; SLOTS];
    reg[0]=p0; reg[1]=p1; reg[2]=p2; reg[3]=p3; reg[4]=p4; reg[5]=p5;

    let mut cursor = 6usize;
    let mut swap_sum = 0u8;
    for (u,v,op3) in spec {
        let addv = add_pd(u,v);
        let subv = sub_pd(u,v);
        let opv  = if op3==0 { mul_pd(u,v) } else { div_pd(u,v) };
        let (first, second, swap) = if subv[0] > addv[0] { (subv, addv, 1u8) } else { (addv, subv, 0u8) };
        swap_sum += swap;
        reg[cursor+0] = first;
        reg[cursor+1] = second;
        reg[cursor+2] = opv;
        cursor += 3;
    }
    *swap_sum_out = swap_sum;
    reg
}

fn detect_collapse_triplet_scatter(pairs: &[[f32;2]; SLOTS]) -> [u8; SLOTS] {
    let mut individual = [0u8; SLOTS];

    for i in 0..SLOTS {
        let real = pairs[i][0];
        let unreal = pairs[i][1];
        let cond1 = (real >= TAU_HI) && (unreal <= TAU_LOW);
        let ratio = if unreal.abs() > EPS { real / unreal } else { 0.0 };
        let cond2 = ratio > R_FOR_RATIO;
        individual[i] = if cond1 || cond2 { 1 } else { 0 };
    }

    let mut final_mask = individual;
    for t in 0..TRIPLETS {
        let idx = triplet_idx(t);
        let a = individual[idx[0]];
        let b = individual[idx[1]];
        let c = individual[idx[2]];
        let uniform = (a==b) && (b==c);
        if uniform {
            final_mask[idx[0]] = a;
            final_mask[idx[1]] = a;
            final_mask[idx[2]] = a;
        } else {
            final_mask[idx[0]] = a;
            final_mask[idx[1]] = b;
            final_mask[idx[2]] = c;
        }
    }
    final_mask
}

fn apply_parity_rotation(pairs: &[[f32;2]; SLOTS], collapse: &[u8; SLOTS]) -> ([[f32;2]; SLOTS], [u8; SLOTS]) {
    let mut rotated = [[0.0f32;2]; SLOTS];
    let mut parity = [0u8; SLOTS];
    for i in 0..SLOTS {
        let affected = (PRIME_MASK_30[i] > 0) || (collapse[i] > 0);
        parity[i] = if affected { 1 } else { 0 };
        let s = if affected { -1.0f32 } else { 1.0f32 };
        rotated[i] = [pairs[i][0]*s, pairs[i][1]*s];
    }
    (rotated, parity)
}

fn bitmap(rotated: &[[f32;2]; SLOTS]) -> [u8; SLOTS] {
    let mut bits = [0u8; SLOTS];
    for i in 0..SLOTS { bits[i] = if rotated[i][0] > EPS { 1 } else { 0 }; }
    bits
}

fn pack30_to_u32(bits: &[u8; SLOTS]) -> u32 {
    let mut x = 0u32;
    for i in 0..30 { x |= (bits[i] as u32) << i; }
    x
}

fn pack90(b0: u32, b1: u32, b2: u32) -> (u64,u64) {
    let b0 = (b0 & ((1u32<<30)-1)) as u64;
    let b1 = (b1 & ((1u32<<30)-1)) as u64;
    let b2 = (b2 & ((1u32<<30)-1)) as u64;
    let low = b0 | (b1<<30) | ((b2 & 0xF)<<60);
    let high = (b2 >> 4);
    (low, high)
}

fn commit_full90(instr_counter: u32, w0: u32, w1: u32, w2: u32, initiator: u8, scx: u8, scy: u8, scz: u8) -> [u8;32] {
    let mut h = Blake2s256::new();
    h.update(b"NTHISA90");
    h.update(&instr_counter.to_le_bytes());
    h.update(&w0.to_le_bytes());
    h.update(&w1.to_le_bytes());
    h.update(&w2.to_le_bytes());
    h.update(&[initiator, scx, scy, scz]);
    let out = h.finalize();
    let mut a = [0u8;32];
    a.copy_from_slice(&out[..]);
    a
}

fn tag_u64(commit: &[u8;32], q: usize, instr_counter: u32) -> u64 {
    let mut h = Blake2s256::new();
    h.update(b"NTH_TAG0");
    h.update(commit);
    h.update(&(q as u16).to_le_bytes());
    h.update(&instr_counter.to_le_bytes());
    let out = h.finalize();
    u64::from_le_bytes(out[0..8].try_into().unwrap())
}

#[derive(Clone, Debug)]
struct DedupInfo { collision_qubits: u32, groups: u32, active: u32, freed: u32 }

fn dedup(commits: &[[u8;32]; Q], efficiency: &[u32; Q], mode_pair_hunt: bool, barrier: bool)
    -> (Option<DedupInfo>, [bool; Q], [usize; Q])
{
    if barrier {
        return (None, [true; Q], core::array::from_fn(|i| i));
    }

    let mut map: HashMap<[u8;32], Vec<usize>> = HashMap::new();
    for q in 0..Q { map.entry(commits[q]).or_insert_with(Vec::new).push(q); }

    let mut winner_of = core::array::from_fn(|i| i);
    let mut active_mask = [true; Q];
    let mut freed_count = 0u32;
    let mut collision_qubits = 0u32;

    for (_k, qs) in map.iter() {
        if qs.len() > 1 { collision_qubits += (qs.len() as u32) - 1; }
        let mut best = qs[0];
        for &q in qs.iter() {
            let eb = efficiency[best];
            let eq = efficiency[q];
            if (eq < eb) || (eq == eb && q < best) { best = q; }
        }
        for &q in qs.iter() {
            winner_of[q] = best;
            if q != best {
                active_mask[q] = false;
                if !mode_pair_hunt { freed_count += 1; }
            }
        }
    }

    let info = DedupInfo {
        collision_qubits,
        groups: map.len() as u32,
        active: active_mask.iter().filter(|&&b| b).count() as u32,
        freed: freed_count,
    };
    (Some(info), active_mask, winner_of)
}

fn free_alias(st: &mut ISAState) {
    if st.dedup.is_none() { return; }
    if st.mode_pair_hunt {
        for q in 0..Q {
            if !st.active_mask[q] {
                let w = st.winner_of[q];
                st.mag_xyz[q] = st.mag_xyz[w];
                st.base_sign[q] = st.base_sign[w];
            }
        }
    } else {
        for q in 0..Q {
            if !st.active_mask[q] {
                // derive new NEC from commit (deterministic)
                let mut h = Blake2s256::new();
                h.update(b"NTH_FREE0");
                h.update(&st.commits[q]);
                h.update(&(q as u16).to_le_bytes());
                h.update(&(st.instr_counter + 1).to_le_bytes());
                let out = h.finalize();
                let m0 = (out[0] as i32) & 63;
                let m1 = (out[1] as i32) & 63;
                let m2 = (out[2] as i32) & 63;
                let sb = out[3];
                let s0 = if (sb & 1) != 0 { 1 } else { -1 };
                let s1 = if (sb & 2) != 0 { 1 } else { -1 };
                let s2 = if (sb & 4) != 0 { 1 } else { -1 };
                st.mag_xyz[q] = [m0,m1,m2];
                st.base_sign[q] = [s0,s1,s2];
            }
        }
        st.active_mask = [true; Q];
        st.winner_of = core::array::from_fn(|i| i);
        st.dedup = None;
    }
}

// Fused stage triggered by PACK in bytecode
fn stage_measure_transcode(st: &mut ISAState, mask: QubitMask) {
    st.initiator = initiator_from_shared_counts(&st.mag_xyz, mask);

    let active_only = st.active_only_allowed() && st.dedup.is_some();
    let mut do_q = [true; Q];
    if active_only {
        for q in 0..Q { do_q[q] = st.active_mask[q] && mask.contains(q); }
    } else {
        for q in 0..Q { do_q[q] = mask.contains(q); }
    }

    for q in 0..Q {
        if !do_q[q] { continue; }

        let prim6 = build_primaries_from_nec_for_qubit(st.mag_xyz[q], st.base_sign[q]);

        for v in 0..3 {
            let prim_v = permute_primaries_by_initiator(prim6, v as u8);

            let mut swap_sum = 0u8;
            let reg30 = build_register30_for_qubit(prim_v, &mut swap_sum);
            st.reg30_views[q][v] = reg30;
            st.swapcount_xyz[q][v] = swap_sum;

            let coll = detect_collapse_triplet_scatter(&st.reg30_views[q][v]);
            st.collapse_views[q][v] = coll;

            let (rot, par) = apply_parity_rotation(&st.reg30_views[q][v], &st.collapse_views[q][v]);
            st.rotated_views[q][v] = rot;
            st.parity_views[q][v] = par;

            let bits = bitmap(&st.rotated_views[q][v]);
            st.bits_views[q][v] = bits;

            let w = pack30_to_u32(&st.bits_views[q][v]);
            st.packed_views_u32[q][v] = w;
        }

        let (lo, hi) = pack90(st.packed_views_u32[q][0], st.packed_views_u32[q][1], st.packed_views_u32[q][2]);
        st.packed90_u64[q] = [lo, hi];

        let c = commit_full90(
            st.instr_counter,
            st.packed_views_u32[q][0], st.packed_views_u32[q][1], st.packed_views_u32[q][2],
            st.initiator[q],
            st.swapcount_xyz[q][0], st.swapcount_xyz[q][1], st.swapcount_xyz[q][2]
        );
        st.commits[q] = c;
        st.tags_u64[q] = tag_u64(&st.commits[q], q, st.instr_counter);
    }

    // Fill aliases in PAIR_HUNT
    if active_only && st.mode_pair_hunt {
        for q in 0..Q {
            if mask.contains(q) && !st.active_mask[q] {
                let w = st.winner_of[q];
                st.reg30_views[q] = st.reg30_views[w];
                st.swapcount_xyz[q] = st.swapcount_xyz[w];
                st.collapse_views[q] = st.collapse_views[w];
                st.parity_views[q] = st.parity_views[w];
                st.rotated_views[q] = st.rotated_views[w];
                st.bits_views[q] = st.bits_views[w];
                st.packed_views_u32[q] = st.packed_views_u32[w];
                st.packed90_u64[q] = st.packed90_u64[w];
                st.commits[q] = st.commits[w];
                st.tags_u64[q] = tag_u64(&st.commits[w], q, st.instr_counter);
                st.initiator[q] = st.initiator[w];
            }
        }
    }
}

fn exec_one(st: &mut ISAState, mem: &mut SimMemory, ins: DecodedInstr) {
    let mask = st.gpr_masks[ins.gpr_mask.min(7)];
    match ins.op {
        Opcode::NecLoad => {
            // deterministic load using aux32 seed
            let mut x = (st.instr_counter as u64) ^ (ins.aux32 as u64) ^ 0x9E3779B97F4A7C15u64;
            for q in 0..Q {
                if !mask.contains(q) { continue; }
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][0] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][1] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][2] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                let sb = (x as u8) & 7;
                st.base_sign[q][0] = if (sb & 1)!=0 { 1 } else { -1 };
                st.base_sign[q][1] = if (sb & 2)!=0 { 1 } else { -1 };
                st.base_sign[q][2] = if (sb & 4)!=0 { 1 } else { -1 };
            }
            st.active_mask = [true; Q];
            st.winner_of = core::array::from_fn(|i| i);
            st.dedup = None;
            st.barrier = false;
        }
        Opcode::NecAlu => {
            let subop = ins.imm8;
            let dst = (ins.dst_axis as usize).min(2);
            let src = (ins.src_axis as usize).min(2);
            let imm = ins.imm32;
            for q in 0..Q {
                if !mask.contains(q) { continue; }
                let a = st.mag_xyz[q][dst];
                let b = st.mag_xyz[q][src];
                let v = match subop {
                    0 => (a + b) & 63,
                    1 => (a - b) & 63,
                    2 => (a * b) & 63,
                    3 => if b==0 { a } else { (a / b) & 63 },
                    4 => (a ^ b) & 63,
                    5 => (a + imm) & 63,
                    _ => a,
                };
                st.mag_xyz[q][dst] = v;
            }
        }
        Opcode::PhaseFlip => {
            let axis = (ins.dst_axis as usize).min(2);
            for q in 0..Q {
                if !mask.contains(q) { continue; }
                st.base_sign[q][axis] *= -1;
            }
        }
        Opcode::RotAxes => {
            let dir = ins.imm8;
            for q in 0..Q {
                if !mask.contains(q) { continue; }
                if dir == 0 {
                    let m = st.mag_xyz[q];
                    let s = st.base_sign[q];
                    st.mag_xyz[q] = [m[2], m[0], m[1]];
                    st.base_sign[q] = [s[2], s[0], s[1]];
                } else {
                    let m = st.mag_xyz[q];
                    let s = st.base_sign[q];
                    st.mag_xyz[q] = [m[1], m[2], m[0]];
                    st.base_sign[q] = [s[1], s[2], s[0]];
                }
            }
        }
        Opcode::ShiftMag => {
            let axis = (ins.dst_axis as usize).min(2);
            let sh = ins.imm32;
            for q in 0..Q {
                if !mask.contains(q) { continue; }
                let a = st.mag_xyz[q][axis];
                let v = if sh >= 0 { (a << sh) & 63 } else { (a >> (-sh)) & 63 };
                st.mag_xyz[q][axis] = v;
            }
        }
        Opcode::Barrier => {
            st.barrier = true;
        }
        Opcode::Pack => {
            stage_measure_transcode(st, mask);
        }
        Opcode::Dedup => {
            let mut eff = [0u32; Q];
            for q in 0..Q {
                eff[q] = (st.swapcount_xyz[q][0] as u32)
                       + (st.swapcount_xyz[q][1] as u32)
                       + (st.swapcount_xyz[q][2] as u32);
            }
            let (d, am, wo) = dedup(&st.commits, &eff, st.mode_pair_hunt, st.barrier);
            st.dedup = d;
            st.active_mask = am;
            st.winner_of = wo;
        }
        Opcode::FreeAlias => free_alias(st),
        Opcode::Next => {
            st.instr_counter += 1;
            st.barrier = false;
        }
        Opcode::LoadMemU32 => {
            let addr = ins.aux32 as usize;
            let v = mem.read_u32(addr) as i32;
            for q in 0..Q {
                if !mask.contains(q) { continue; }
                st.mag_xyz[q][0] = (st.mag_xyz[q][0] ^ v) & 63;
            }
        }
        Opcode::StoreMemU32 => {
            let addr = ins.aux32 as usize;
            let mut q0 = None;
            for q in 0..Q {
                if mask.contains(q) { q0 = Some(q); break; }
            }
            if let Some(qi) = q0 {
                mem.write_u32(addr, st.packed_views_u32[qi][0]);
            }
        }
    }
}

fn exec_program(st: &mut ISAState, mem: &mut SimMemory, bytecode: &[u8]) {
    assert!(bytecode.len() % 16 == 0);
    for i in 0..(bytecode.len()/16) {
        let ins = decode16(&bytecode[i*16..i*16+16]);
        exec_one(st, mem, ins);
    }
}

fn build_state_map(commits: &[[u8;32]; Q]) -> HashMap<[u8;32], Vec<usize>> {
    let mut m: HashMap<[u8;32], Vec<usize>> = HashMap::new();
    for q in 0..Q {
        m.entry(commits[q]).or_insert_with(Vec::new).push(q);
    }
    m
}

fn main() {
    let mut st = ISAState::new();
    let mut mem = SimMemory::new(1024);

    // Mode: FREE by default
    st.mode_pair_hunt = false;
    st.exec_active_only = true;

    // GPR masks
    st.gpr_masks[0] = QubitMask::all();
    st.gpr_masks[1] = QubitMask::only(0);

    // Build bytecode program
    let mut bc: Vec<u8> = Vec::new();
    bc.extend_from_slice(&emit(Opcode::NecLoad, 0, 0, 0,0,0, 0, 11));
    bc.extend_from_slice(&emit(Opcode::NecAlu,  0, 0, 0,1,0, 0, 0));     // x += y
    bc.extend_from_slice(&emit(Opcode::NecAlu,  0, 0, 2,0,4, 0, 0));     // z ^= x
    bc.extend_from_slice(&emit(Opcode::ShiftMag,0, 0, 1,0,0,  1, 0));    // y <<= 1
    bc.extend_from_slice(&emit(Opcode::PhaseFlip,0,0,0,0,0, 0, 0));      // flip x
    bc.extend_from_slice(&emit(Opcode::RotAxes, 0, 0, 0,0,0, 0, 0));     // rotate R
    bc.extend_from_slice(&emit(Opcode::Pack,    0, 0, 0,0,0, 0, 0));     // measure/transcode
    bc.extend_from_slice(&emit(Opcode::Dedup,   0, 0, 0,0,0, 0, 0));
    bc.extend_from_slice(&emit(Opcode::FreeAlias,0,0,0,0,0, 0, 0));
    bc.extend_from_slice(&emit(Opcode::Next,    0, 0, 0,0,0, 0, 0));

    bc.extend_from_slice(&emit(Opcode::NecAlu,  0, 0, 0,2,2, 0, 0));     // x *= z
    bc.extend_from_slice(&emit(Opcode::Barrier, 0, 0, 0,0,0, 0, 0));     // barrier
    bc.extend_from_slice(&emit(Opcode::Pack,    0, 0, 0,0,0, 0, 0));
    bc.extend_from_slice(&emit(Opcode::Dedup,   0, 0, 0,0,0, 0, 0));
    bc.extend_from_slice(&emit(Opcode::Next,    0, 0, 0,0,0, 0, 0));

    exec_program(&mut st, &mut mem, &bc);

    println!("=== UniversalISA v0.8.1 Rust Demo ===");
    println!("instr_counter: {}", st.instr_counter);
    println!("mode_pair_hunt: {} exec_active_only: {} barrier: {}", st.mode_pair_hunt, st.exec_active_only, st.barrier);
    println!("q0 NEC mag={:?} sign={:?}", st.mag_xyz[0], st.base_sign[0]);
    println!("q0 packed_views_u32={:?}", st.packed_views_u32[0]);
    println!("q0 packed90_u64={:?}", st.packed90_u64[0]);
    println!("q0 tag_u64={}", st.tags_u64[0]);

    let sm = build_state_map(&st.commits);
    println!("state_groups: {}", sm.len());
    let mut best_len = 0usize;
    let mut best_members: Vec<usize> = vec![];
    for (_k, v) in sm.iter() {
        if v.len() > best_len {
            best_len = v.len();
            best_members = v.clone();
        }
    }
    println!("largest_group_size: {} members(sample)={:?}", best_len, &best_members[..best_members.len().min(10)]);

    let active_count = st.active_mask.iter().filter(|&&b| b).count();
    println!("active_qubits: {} aliases: {}", active_count, Q - active_count);
}
RS

cd uisa_v08
source $HOME/.cargo/env
cargo run --release

Two Cell Python Wrapped Rust Code for Test

CELL 1 — Build + install the Rust Python module (uisa_v09)

In [None]:
%%bash
set -e

# ---- 0) toolchain prerequisites ----
apt-get update -y >/dev/null
apt-get install -y build-essential pkg-config >/dev/null

# ---- 1) install rust if missing ----
if ! command -v cargo >/dev/null 2>&1; then
  curl https://sh.rustup.rs -sSf | sh -s -- -y
fi
source $HOME/.cargo/env

# ---- 2) install maturin ----
python3 -m pip -q install --upgrade pip
python3 -m pip -q install maturin

# ---- 3) write Rust project ----
rm -rf uisa_v09
mkdir -p uisa_v09/src

cat > uisa_v09/Cargo.toml <<'TOML'
[package]
name = "uisa_v09"
version = "0.9.0"
edition = "2021"

[lib]
name = "uisa_v09"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.21", features = ["extension-module"] }
blake2 = "0.10"
TOML

cat > uisa_v09/src/lib.rs <<'RS'
use blake2::{Blake2s256, Digest};
use pyo3::prelude::*;
use std::collections::HashMap;

const Q: usize = 64;
const SLOTS: usize = 30;
const VIEWS: usize = 3;
const TRIPLETS: usize = 10;

const EPS: f32 = 1e-6;
const TAU_HI: f32 = 1.0;
const TAU_LOW: f32 = -1.0;
const R_FOR_RATIO: f32 = 64.0;

const PRIME_MASK_30: [u8; 30] = [
    0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1
];

fn triplet_idx(t: usize) -> [usize;3] { [3*t, 3*t+1, 3*t+2] }

// 16-byte instruction decode
#[repr(u8)]
#[derive(Clone, Copy, Debug)]
enum Opcode {
    NecLoad = 0x01,
    NecAlu  = 0x02,     // imm8: ADD=0,SUB=1,MUL=2,DIV=3,XOR=4, ADDI=5 uses imm32
    PhaseFlip = 0x03,
    RotAxes   = 0x04,   // imm8: 0=R,1=L
    ShiftMag  = 0x05,   // imm32 signed shift
    Barrier   = 0x06,

    Pack       = 0x15,  // fused measure/transcode
    Dedup      = 0x17,
    FreeAlias  = 0x18,
    Next       = 0x19,

    MemLoadU32 = 0x20,  // aux32=addr, XOR into x magnitudes
    MemStoreU32= 0x21,  // aux32=addr, store packed_views_u32[q,0] from first qubit
}

#[derive(Clone, Copy)]
struct DecodedInstr {
    op: Opcode,
    gpr_mask: u8,       // reserved for v1.0 masks; v0.9 uses mask=all
    dst_axis: u8,
    src_axis: u8,
    imm8: u8,
    imm32: i32,
    aux32: u32,
}

fn decode16(bytes: &[u8]) -> DecodedInstr {
    assert!(bytes.len() == 16);
    let op = match bytes[0] {
        0x01 => Opcode::NecLoad,
        0x02 => Opcode::NecAlu,
        0x03 => Opcode::PhaseFlip,
        0x04 => Opcode::RotAxes,
        0x05 => Opcode::ShiftMag,
        0x06 => Opcode::Barrier,
        0x15 => Opcode::Pack,
        0x17 => Opcode::Dedup,
        0x18 => Opcode::FreeAlias,
        0x19 => Opcode::Next,
        0x20 => Opcode::MemLoadU32,
        0x21 => Opcode::MemStoreU32,
        _ => panic!("unknown opcode {}", bytes[0]),
    };
    let gpr_mask = bytes[2];
    let dst_axis = bytes[3];
    let src_axis = bytes[4];
    let imm8 = bytes[5];
    let imm32 = i32::from_le_bytes([bytes[8],bytes[9],bytes[10],bytes[11]]);
    let aux32 = u32::from_le_bytes([bytes[12],bytes[13],bytes[14],bytes[15]]);
    DecodedInstr { op, gpr_mask, dst_axis, src_axis, imm8, imm32, aux32 }
}

// ===========================
// SimMemory (used in Colab tests)
// ===========================
#[derive(Clone)]
struct SimMemory { buf: Vec<u8> }
impl SimMemory {
    fn new(size: usize) -> Self { Self { buf: vec![0u8; size] } }
    fn read_u32(&self, addr: usize) -> u32 {
        let b = &self.buf[addr..addr+4];
        u32::from_le_bytes([b[0],b[1],b[2],b[3]])
    }
    fn write_u32(&mut self, addr: usize, v: u32) {
        let b = v.to_le_bytes();
        self.buf[addr..addr+4].copy_from_slice(&b);
    }
}

// ===========================
// ISA core state (Rust-native)
// ===========================
#[derive(Clone)]
struct ISA {
    instr_counter: u32,
    mode_pair_hunt: bool,
    exec_active_only: bool,
    barrier: bool,

    // NEC state
    mag_xyz: [[i32;3]; Q],
    base_sign: [[i32;3]; Q],

    // scheduling
    active_mask: [bool; Q],
    winner_of: [usize; Q],

    // derived
    initiator: [u8; Q],
    swapcount_xyz: [[u8;3]; Q],
    packed_views_u32: [[u32;3]; Q],
    packed90_u64: [[u64;2]; Q],
    commits: [[u8;32]; Q],
}

impl ISA {
    fn new(mode_pair_hunt: bool, exec_active_only: bool) -> Self {
        Self {
            instr_counter: 0,
            mode_pair_hunt,
            exec_active_only,
            barrier: false,
            mag_xyz: [[0;3]; Q],
            base_sign: [[1;3]; Q],
            active_mask: [true; Q],
            winner_of: core::array::from_fn(|i| i),
            initiator: [0u8; Q],
            swapcount_xyz: [[0u8;3]; Q],
            packed_views_u32: [[0u32;3]; Q],
            packed90_u64: [[0u64;2]; Q],
            commits: [[0u8;32]; Q],
        }
    }

    fn active_only_allowed(&self) -> bool {
        self.exec_active_only && self.mode_pair_hunt && !self.barrier
    }
}

// ===========================
// Math / Nth pipeline primitives
// ===========================
fn initiator_from_shared_counts(mag: &[[i32;3]; Q]) -> [u8; Q] {
    let mut init = [0u8; Q];
    let mut counts = [[0i32;3]; Q];

    for axis in 0..3 {
        for q in 0..Q {
            let v = mag[q][axis];
            let mut c = 0i32;
            for k in 0..Q {
                if mag[k][axis] == v { c += 1; }
            }
            counts[q][axis] = c;
        }
    }

    for q in 0..Q {
        let cx = counts[q][0];
        let cy = counts[q][1];
        let cz = counts[q][2];
        let shared = [cx>1, cy>1, cz>1];
        let shared_count = shared.iter().filter(|&&b| b).count();

        let argmax_counts = {
            let mut best = 0usize;
            let mut bestv = counts[q][0];
            for a in 1..3 {
                if counts[q][a] > bestv { best=a; bestv=counts[q][a]; }
            }
            best as u8
        };

        if shared_count == 1 {
            init[q] = argmax_counts;
        } else if shared_count == 2 {
            let mut odd = 0usize;
            for a in 0..3 { if counts[q][a] == 1 { odd=a; break; } }
            init[q] = odd as u8;
        } else if shared_count == 3 {
            init[q] = argmax_counts;
        } else {
            let v0 = mag[q][0]*1000 + 2;
            let v1 = mag[q][1]*1000 + 1;
            let v2 = mag[q][2]*1000 + 0;
            let mut best = 0usize;
            let mut bestv = v0;
            if v1 > bestv { best=1; bestv=v1; }
            if v2 > bestv { best=2; bestv=v2; }
            init[q] = best as u8;
        }
    }

    init
}

fn build_primaries_from_nec_for_qubit(mag: [i32;3], sign: [i32;3]) -> [[f32;2]; 6] {
    let mut out = [[0.0f32;2]; 6];
    for axis in 0..3 {
        let m = mag[axis] as f32;
        let real = [ m, -m];
        let unrl = [-m,  m];
        let (first, second) = if sign[axis] < 0 { (unrl, real) } else { (real, unrl) };
        out[2*axis + 0] = first;
        out[2*axis + 1] = second;
    }
    out
}

fn permute_primaries_by_initiator(prim6: [[f32;2];6], init_axis: u8) -> [[f32;2];6] {
    let idx_x = [0usize,1,2,3,4,5];
    let idx_y = [2usize,3,4,5,0,1];
    let idx_z = [4usize,5,0,1,2,3];
    let idx = match init_axis { 0 => idx_x, 1 => idx_y, _ => idx_z };
    let mut out = [[0.0f32;2];6];
    for i in 0..6 { out[i] = prim6[idx[i]]; }
    out
}

fn add_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]+b[0], a[1]+b[1]] }
fn sub_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]-b[0], a[1]-b[1]] }
fn mul_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]*b[0], a[1]*b[1]] }
fn div_pd(a: [f32;2], b: [f32;2]) -> [f32;2] {
    let mut out = [0.0f32;2];
    for i in 0..2 { if b[i].abs() > EPS { out[i] = a[i] / b[i]; } }
    out
}

// REG30 build (10 triplets) with add/sub-only canonicalization
fn build_register30_for_qubit(prim6: [[f32;2];6], swap_sum_out: &mut u8) -> [[f32;2]; SLOTS] {
    let (p0,p1,p2,p3,p4,p5) = (prim6[0],prim6[1],prim6[2],prim6[3],prim6[4],prim6[5]);
    let (a0,a1) = (p0,p1);
    let (b0,b1) = (p2,p3);
    let (c0,c1) = (p4,p5);

    let spec: [([f32;2],[f32;2],u8);8] = [
        (a0,b0,0), (b0,c0,0), (a0,c0,0), (a1,b1,0),
        (a0,b1,1), (b0,c1,1), (a1,c0,1), (b1,c1,1),
    ];

    let mut reg = [[0.0f32;2]; SLOTS];
    reg[0]=p0; reg[1]=p1; reg[2]=p2; reg[3]=p3; reg[4]=p4; reg[5]=p5;

    let mut cursor = 6usize;
    let mut swap_sum = 0u8;
    for (u,v,op3) in spec {
        let addv = add_pd(u,v);
        let subv = sub_pd(u,v);
        let opv  = if op3==0 { mul_pd(u,v) } else { div_pd(u,v) };
        let (first, second, swap) = if subv[0] > addv[0] { (subv, addv, 1u8) } else { (addv, subv, 0u8) };
        swap_sum += swap;
        reg[cursor+0] = first;
        reg[cursor+1] = second;
        reg[cursor+2] = opv;
        cursor += 3;
    }
    *swap_sum_out = swap_sum;
    reg
}

fn detect_collapse_triplet_scatter(pairs: &[[f32;2]; SLOTS]) -> [u8; SLOTS] {
    let mut individual = [0u8; SLOTS];
    for i in 0..SLOTS {
        let real = pairs[i][0];
        let unreal = pairs[i][1];
        let cond1 = (real >= TAU_HI) && (unreal <= TAU_LOW);
        let ratio = if unreal.abs() > EPS { real / unreal } else { 0.0 };
        let cond2 = ratio > R_FOR_RATIO;
        individual[i] = if cond1 || cond2 { 1 } else { 0 };
    }

    let mut final_mask = individual;
    for t in 0..TRIPLETS {
        let idx = triplet_idx(t);
        let a = individual[idx[0]];
        let b = individual[idx[1]];
        let c = individual[idx[2]];
        let uniform = (a==b) && (b==c);
        if uniform {
            final_mask[idx[0]] = a;
            final_mask[idx[1]] = a;
            final_mask[idx[2]] = a;
        } else {
            final_mask[idx[0]] = a;
            final_mask[idx[1]] = b;
            final_mask[idx[2]] = c;
        }
    }
    final_mask
}

fn apply_parity_rotation(pairs: &[[f32;2]; SLOTS], collapse: &[u8; SLOTS]) -> ([[f32;2]; SLOTS], [u8; SLOTS]) {
    let mut rotated = [[0.0f32;2]; SLOTS];
    let mut parity = [0u8; SLOTS];
    for i in 0..SLOTS {
        let affected = (PRIME_MASK_30[i] > 0) || (collapse[i] > 0);
        parity[i] = if affected { 1 } else { 0 };
        let s = if affected { -1.0f32 } else { 1.0f32 };
        rotated[i] = [pairs[i][0]*s, pairs[i][1]*s];
    }
    (rotated, parity)
}

fn bitmap(rotated: &[[f32;2]; SLOTS]) -> [u8; SLOTS] {
    let mut bits = [0u8; SLOTS];
    for i in 0..SLOTS { bits[i] = if rotated[i][0] > EPS { 1 } else { 0 }; }
    bits
}

fn pack30_to_u32(bits: &[u8; SLOTS]) -> u32 {
    let mut x = 0u32;
    for i in 0..30 { x |= (bits[i] as u32) << i; }
    x
}

fn pack90(b0: u32, b1: u32, b2: u32) -> (u64,u64) {
    let b0 = (b0 & ((1u32<<30)-1)) as u64;
    let b1 = (b1 & ((1u32<<30)-1)) as u64;
    let b2 = (b2 & ((1u32<<30)-1)) as u64;
    let low = b0 | (b1<<30) | ((b2 & 0xF)<<60);
    let high = (b2 >> 4);
    (low, high)
}

fn commit_full90(instr_counter: u32, w0: u32, w1: u32, w2: u32, initiator: u8, scx: u8, scy: u8, scz: u8) -> [u8;32] {
    let mut h = Blake2s256::new();
    h.update(b"NTHISA90");
    h.update(&instr_counter.to_le_bytes());
    h.update(&w0.to_le_bytes());
    h.update(&w1.to_le_bytes());
    h.update(&w2.to_le_bytes());
    h.update(&[initiator, scx, scy, scz]);
    let out = h.finalize();
    let mut a = [0u8;32];
    a.copy_from_slice(&out[..]);
    a
}

fn tag_u64(commit: &[u8;32], q: usize, instr_counter: u32) -> u64 {
    let mut h = Blake2s256::new();
    h.update(b"NTH_TAG0");
    h.update(commit);
    h.update(&(q as u16).to_le_bytes());
    h.update(&instr_counter.to_le_bytes());
    let out = h.finalize();
    u64::from_le_bytes(out[0..8].try_into().unwrap())
}

#[derive(Clone, Debug)]
struct DedupInfo { collision_qubits: u32, groups: u32, active: u32, freed: u32 }

fn dedup(commits: &[[u8;32]; Q], efficiency: &[u32; Q], mode_pair_hunt: bool, barrier: bool)
    -> (Option<DedupInfo>, [bool; Q], [usize; Q])
{
    if barrier {
        return (None, [true; Q], core::array::from_fn(|i| i));
    }

    let mut map: HashMap<[u8;32], Vec<usize>> = HashMap::new();
    for q in 0..Q { map.entry(commits[q]).or_insert_with(Vec::new).push(q); }

    let mut winner_of = core::array::from_fn(|i| i);
    let mut active_mask = [true; Q];
    let mut freed_count = 0u32;
    let mut collision_qubits = 0u32;

    for (_k, qs) in map.iter() {
        if qs.len() > 1 { collision_qubits += (qs.len() as u32) - 1; }
        let mut best = qs[0];
        for &q in qs.iter() {
            let eb = efficiency[best];
            let eq = efficiency[q];
            if (eq < eb) || (eq == eb && q < best) { best = q; }
        }
        for &q in qs.iter() {
            winner_of[q] = best;
            if q != best {
                active_mask[q] = false;
                if !mode_pair_hunt { freed_count += 1; }
            }
        }
    }

    let info = DedupInfo {
        collision_qubits,
        groups: map.len() as u32,
        active: active_mask.iter().filter(|&&b| b).count() as u32,
        freed: freed_count,
    };
    (Some(info), active_mask, winner_of)
}

fn free_alias(st: &mut ISA) {
    if st.mode_pair_hunt {
        for q in 0..Q {
            if !st.active_mask[q] {
                let w = st.winner_of[q];
                st.mag_xyz[q] = st.mag_xyz[w];
                st.base_sign[q] = st.base_sign[w];
            }
        }
    } else {
        for q in 0..Q {
            if !st.active_mask[q] {
                let mut h = Blake2s256::new();
                h.update(b"NTH_FREE0");
                h.update(&st.commits[q]);
                h.update(&(q as u16).to_le_bytes());
                h.update(&(st.instr_counter + 1).to_le_bytes());
                let out = h.finalize();
                let m0 = (out[0] as i32) & 63;
                let m1 = (out[1] as i32) & 63;
                let m2 = (out[2] as i32) & 63;
                let sb = out[3];
                let s0 = if (sb & 1) != 0 { 1 } else { -1 };
                let s1 = if (sb & 2) != 0 { 1 } else { -1 };
                let s2 = if (sb & 4) != 0 { 1 } else { -1 };
                st.mag_xyz[q] = [m0,m1,m2];
                st.base_sign[q] = [s0,s1,s2];
            }
        }
        // reset epoch to all active
        st.active_mask = [true; Q];
        st.winner_of = core::array::from_fn(|i| i);
    }
}

// Fused measurement/transcode stage (called by PACK)
fn stage_measure_transcode(st: &mut ISA) {
    st.initiator = initiator_from_shared_counts(&st.mag_xyz);

    let active_only = st.active_only_allowed();
    let mut do_q = [true; Q];
    if active_only {
        for q in 0..Q { do_q[q] = st.active_mask[q]; }
    }

    for q in 0..Q {
        if !do_q[q] { continue; }
        let prim6 = build_primaries_from_nec_for_qubit(st.mag_xyz[q], st.base_sign[q]);

        for v in 0..3 {
            let prim_v = permute_primaries_by_initiator(prim6, v as u8);
            let mut swap_sum = 0u8;
            let reg30 = build_register30_for_qubit(prim_v, &mut swap_sum);
            st.swapcount_xyz[q][v] = swap_sum;

            let coll = detect_collapse_triplet_scatter(&reg30);
            let (rot, _par) = apply_parity_rotation(&reg30, &coll);
            let bits = bitmap(&rot);
            st.packed_views_u32[q][v] = pack30_to_u32(&bits);
        }

        let (lo, hi) = pack90(st.packed_views_u32[q][0], st.packed_views_u32[q][1], st.packed_views_u32[q][2]);
        st.packed90_u64[q] = [lo, hi];

        st.commits[q] = commit_full90(
            st.instr_counter,
            st.packed_views_u32[q][0], st.packed_views_u32[q][1], st.packed_views_u32[q][2],
            st.initiator[q],
            st.swapcount_xyz[q][0], st.swapcount_xyz[q][1], st.swapcount_xyz[q][2]
        );
    }

    // Fill aliases in PAIR_HUNT if active-only compute
    if active_only && st.mode_pair_hunt {
        for q in 0..Q {
            if !st.active_mask[q] {
                let w = st.winner_of[q];
                st.packed_views_u32[q] = st.packed_views_u32[w];
                st.packed90_u64[q] = st.packed90_u64[w];
                st.swapcount_xyz[q] = st.swapcount_xyz[w];
                st.commits[q] = st.commits[w];
                st.initiator[q] = st.initiator[w];
            }
        }
    }
}

// ===========================
// Execution
// ===========================
fn exec_one(st: &mut ISA, mem: &mut SimMemory, ins: DecodedInstr) {
    match ins.op {
        Opcode::NecLoad => {
            // deterministic NEC fill based on aux32 + instr_counter
            let mut x = (st.instr_counter as u64) ^ (ins.aux32 as u64) ^ 0x9E3779B97F4A7C15u64;
            for q in 0..Q {
                // xorshift-like
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][0] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][1] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][2] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                let sb = (x as u8) & 7;
                st.base_sign[q][0] = if (sb & 1)!=0 { 1 } else { -1 };
                st.base_sign[q][1] = if (sb & 2)!=0 { 1 } else { -1 };
                st.base_sign[q][2] = if (sb & 4)!=0 { 1 } else { -1 };
            }
            st.active_mask = [true; Q];
            st.winner_of = core::array::from_fn(|i| i);
            st.barrier = false;
        }
        Opcode::NecAlu => {
            let subop = ins.imm8;
            let dst = (ins.dst_axis as usize).min(2);
            let src = (ins.src_axis as usize).min(2);
            let imm = ins.imm32;
            for q in 0..Q {
                let a = st.mag_xyz[q][dst];
                let b = st.mag_xyz[q][src];
                let v = match subop {
                    0 => (a + b) & 63,
                    1 => (a - b) & 63,
                    2 => (a * b) & 63,
                    3 => if b==0 { a } else { (a / b) & 63 },
                    4 => (a ^ b) & 63,
                    5 => (a + imm) & 63,
                    _ => a,
                };
                st.mag_xyz[q][dst] = v;
            }
        }
        Opcode::PhaseFlip => {
            let axis = (ins.dst_axis as usize).min(2);
            for q in 0..Q { st.base_sign[q][axis] *= -1; }
        }
        Opcode::RotAxes => {
            let dir = ins.imm8;
            for q in 0..Q {
                if dir == 0 {
                    let m = st.mag_xyz[q];
                    let s = st.base_sign[q];
                    st.mag_xyz[q] = [m[2], m[0], m[1]];
                    st.base_sign[q] = [s[2], s[0], s[1]];
                } else {
                    let m = st.mag_xyz[q];
                    let s = st.base_sign[q];
                    st.mag_xyz[q] = [m[1], m[2], m[0]];
                    st.base_sign[q] = [s[1], s[2], s[0]];
                }
            }
        }
        Opcode::ShiftMag => {
            let axis = (ins.dst_axis as usize).min(2);
            let sh = ins.imm32;
            for q in 0..Q {
                let a = st.mag_xyz[q][axis];
                let v = if sh >= 0 { (a << sh) & 63 } else { (a >> (-sh)) & 63 };
                st.mag_xyz[q][axis] = v;
            }
        }
        Opcode::Barrier => st.barrier = true,
        Opcode::Pack => stage_measure_transcode(st),
        Opcode::Dedup => {
            if st.barrier {
                st.active_mask = [true; Q];
                st.winner_of = core::array::from_fn(|i| i);
            } else {
                let mut eff = [0u32; Q];
                for q in 0..Q {
                    eff[q] = (st.swapcount_xyz[q][0] as u32) + (st.swapcount_xyz[q][1] as u32) + (st.swapcount_xyz[q][2] as u32);
                }
                let (_info, am, wo) = dedup(&st.commits, &eff, st.mode_pair_hunt, st.barrier);
                st.active_mask = am;
                st.winner_of = wo;
            }
        }
        Opcode::FreeAlias => free_alias(st),
        Opcode::Next => { st.instr_counter += 1; st.barrier = false; }
        Opcode::MemLoadU32 => {
            let addr = ins.aux32 as usize;
            let v = mem.read_u32(addr) as i32;
            for q in 0..Q { st.mag_xyz[q][0] = (st.mag_xyz[q][0] ^ v) & 63; }
        }
        Opcode::MemStoreU32 => {
            let addr = ins.aux32 as usize;
            mem.write_u32(addr, st.packed_views_u32[0][0]);
        }
    }
}

fn exec_program(st: &mut ISA, mem: &mut SimMemory, bytecode: &[u8]) {
    assert!(bytecode.len() % 16 == 0);
    for i in 0..(bytecode.len()/16) {
        let ins = decode16(&bytecode[i*16..i*16+16]);
        exec_one(st, mem, ins);
    }
}

fn state_map(commits: &[[u8;32]; Q]) -> usize {
    let mut m: HashMap<[u8;32], u32> = HashMap::new();
    for q in 0..Q {
        *m.entry(commits[q]).or_insert(0) += 1;
    }
    m.len()
}

// ===========================
// Python-exposed API
// ===========================
#[pyfunction]
fn run_uisa(bytecode: Vec<u8>, mode: &str, exec_active_only: bool, mem_size: usize) -> PyResult<PyObject> {
    if bytecode.len() % 16 != 0 {
        return Err(pyo3::exceptions::PyValueError::new_err("bytecode length must be multiple of 16"));
    }
    let mode_pair_hunt = match mode {
        "FREE" => false,
        "PAIR_HUNT" => true,
        _ => return Err(pyo3::exceptions::PyValueError::new_err("mode must be 'FREE' or 'PAIR_HUNT'")),
    };

    let mut st = ISA::new(mode_pair_hunt, exec_active_only);
    let mut mem = SimMemory::new(mem_size.max(64));

    exec_program(&mut st, &mut mem, &bytecode);

    // Flatten outputs into Python-friendly vectors
    let mut packed_views: Vec<u32> = Vec::with_capacity(Q*3);
    let mut packed90: Vec<u64> = Vec::with_capacity(Q*2);
    let mut initiator: Vec<u8> = Vec::with_capacity(Q);
    let mut active: Vec<u8> = Vec::with_capacity(Q);
    let mut winner_of: Vec<u32> = Vec::with_capacity(Q);
    let mut commits: Vec<u8> = Vec::with_capacity(Q*32);

    for q in 0..Q {
        packed_views.push(st.packed_views_u32[q][0]);
        packed_views.push(st.packed_views_u32[q][1]);
        packed_views.push(st.packed_views_u32[q][2]);

        packed90.push(st.packed90_u64[q][0]);
        packed90.push(st.packed90_u64[q][1]);

        initiator.push(initiator_from_shared_counts(&st.mag_xyz)[q]); // recompute for return stability
        active.push(if st.active_mask[q] { 1 } else { 0 });
        winner_of.push(st.winner_of[q] as u32);

        commits.extend_from_slice(&st.commits[q]);
    }

    let groups = state_map(&st.commits);

    Python::with_gil(|py| {
        let dict = pyo3::types::PyDict::new(py);
        dict.set_item("instr_counter", st.instr_counter)?;
        dict.set_item("mode_pair_hunt", st.mode_pair_hunt)?;
        dict.set_item("barrier", st.barrier)?;
        dict.set_item("state_groups", groups)?;

        dict.set_item("packed_views_u32_flat", packed_views)?;
        dict.set_item("packed90_u64_flat", packed90)?;
        dict.set_item("initiator", initiator)?;
        dict.set_item("active_mask", active)?;
        dict.set_item("winner_of", winner_of)?;
        dict.set_item("commits_bytes", commits)?;

        // also return a small memory readback for debugging
        dict.set_item("mem_u32_0", mem.read_u32(0))?;
        Ok(dict.into())
    })
}

#[pymodule]
fn uisa_v09(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_function(wrap_pyfunction!(run_uisa, m)?)?;
    Ok(())
}
RS

# ---- 4) build wheel and install ----
cd uisa_v09
source $HOME/.cargo/env
maturin build -q --release
python3 -m pip -q install target/wheels/uisa_v09-*.whl
echo "Built and installed uisa_v09."


  stable-x86_64-unknown-linux-gnu installed - rustc 1.92.0 (ded5c06cf 2025-12-08)


Rust is installed now. Great!

To get started you may need to restart your current shell.
This would reload your PATH environment variable to include
Cargo's bin directory ($HOME/.cargo/bin).

To configure your current shell, you need to source
the corresponding env file under $HOME/.cargo.

This is usually done by running one of the following (note the leading DOT):
. "$HOME/.cargo/env"            # For sh/bash/zsh/ash/dash/pdksh
source "$HOME/.cargo/env.fish"  # For fish
source $"($nu.home-path)/.cargo/env.nu"  # For nushell
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 17.2 MB/s eta 0:00:00
Built and installed uisa_v09.


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
info: downloading installer
info: profile set to 'default'
info: default host triple is x86_64-unknown-linux-gnu
info: syncing channel updates for 'stable-x86_64-unknown-linux-gnu'
info: latest update on 2025-12-11, rust version 1.92.0 (ded5c06cf 2025-12-08)
info: downloading component 'cargo'
info: downloading component 'clippy'
info: downloading component 'rust-docs'
info: downloading component 'rust-std'
info: downloading component 'rustc'
info: downloading component 'rustfmt'
info: installing component 'cargo'
info: installing component 'clippy'
info: installing component 'rust-docs'
info: installing component 'rust-std'
info: installing component 'rustc'
info: installing component 'rustfmt'
info: default toolchain set to 'stable-x86_64-unknown-linux-gnu'
    Updating crates.io index
     Locking

CELL 2 — Python harness: assemble bytecode, run FREE and PAIR_HUNT, inspect outputs

In [None]:
import struct
import numpy as np
import uisa_v09

# canonical 16-byte encoding (must match Rust decode16 layout)
def emit(op, flags=0, gpr_mask=0, dst=0, src=0, imm8=0, imm32=0, aux32=0):
    b = bytearray(16)
    b[0] = op
    b[1] = flags
    b[2] = gpr_mask
    b[3] = dst
    b[4] = src
    b[5] = imm8
    b[8:12] = struct.pack("<i", imm32)
    b[12:16] = struct.pack("<I", aux32)
    return bytes(b)

# opcodes (match Rust enum values)
OP_NEC_LOAD   = 0x01
OP_NEC_ALU    = 0x02
OP_PHASE_FLIP = 0x03
OP_ROT_AXES   = 0x04
OP_SHIFT_MAG  = 0x05
OP_BARRIER    = 0x06
OP_PACK       = 0x15
OP_DEDUP      = 0x17
OP_FREE_ALIAS = 0x18
OP_NEXT       = 0x19

OP_MEM_LOAD_U32  = 0x20
OP_MEM_STORE_U32 = 0x21

# subops for NEC_ALU (imm8)
ALU_ADD = 0
ALU_SUB = 1
ALU_MUL = 2
ALU_DIV = 3
ALU_XOR = 4
ALU_ADDI= 5

# Build a demo bytecode program similar to v0.7
bc = b"".join([
    emit(OP_NEC_LOAD, aux32=11),

    emit(OP_NEC_ALU, dst=0, src=1, imm8=ALU_ADD),   # x += y
    emit(OP_NEC_ALU, dst=2, src=0, imm8=ALU_XOR),   # z ^= x
    emit(OP_SHIFT_MAG, dst=1, imm32=1),             # y <<= 1
    emit(OP_PHASE_FLIP, dst=0),                     # flip x
    emit(OP_ROT_AXES, imm8=0),                      # rotate R

    emit(OP_PACK),
    emit(OP_DEDUP),
    emit(OP_FREE_ALIAS),
    emit(OP_NEXT),

    emit(OP_NEC_ALU, dst=0, src=2, imm8=ALU_MUL),   # x *= z
    emit(OP_BARRIER),                               # suppress dedup this epoch
    emit(OP_PACK),
    emit(OP_DEDUP),
    emit(OP_NEXT),
])

def reshape_outputs(out):
    packed_views = np.array(out["packed_views_u32_flat"], dtype=np.uint32).reshape(64,3)
    packed90 = np.array(out["packed90_u64_flat"], dtype=np.uint64).reshape(64,2)
    initiator = np.array(out["initiator"], dtype=np.uint8)
    active = np.array(out["active_mask"], dtype=np.uint8)
    winner = np.array(out["winner_of"], dtype=np.uint32)
    commits = bytes(out["commits_bytes"])
    commits = [commits[i*32:(i+1)*32] for i in range(64)]
    return packed_views, packed90, initiator, active, winner, commits

for mode in ["FREE", "PAIR_HUNT"]:
    out = uisa_v09.run_uisa(bytecode=list(bc), mode=mode, exec_active_only=True, mem_size=1024)
    packed_views, packed90, initiator, active, winner, commits = reshape_outputs(out)

    print("\n=== v0.9 Rust module run ===")
    print("mode:", mode, "instr_counter:", out["instr_counter"], "barrier:", out["barrier"], "state_groups:", out["state_groups"])
    print("packed_views_u32[0]:", packed_views[0].tolist())
    print("packed90_u64[0]:", packed90[0].tolist())
    print("initiator counts:", np.bincount(initiator, minlength=3).tolist())
    print("active count:", int(active.sum()), "aliases:", 64 - int(active.sum()))
    print("winner_of[0..7]:", winner[:8].tolist())
    print("commit0 hex:", commits[0].hex()[:32], "...")


=== v0.9 Rust module run ===
mode: FREE instr_counter: 2 barrier: False state_groups: 50
packed_views_u32[0]: [604514724, 538601608, 76677124]
packed90_u64[0]: [5190005092015155620, 4792320]
initiator counts: [27, 16, 21]
active count: 64 aliases: 0
winner_of[0..7]: [0, 1, 2, 3, 4, 5, 6, 7]
commit0 hex: 4701ea597a1aaeab36b00f0627b396b1 ...

=== v0.9 Rust module run ===
mode: PAIR_HUNT instr_counter: 2 barrier: False state_groups: 44
packed_views_u32[0]: [604514724, 538601608, 76677124]
packed90_u64[0]: [5190005092015155620, 4792320]
initiator counts: [17, 20, 27]
active count: 64 aliases: 0
winner_of[0..7]: [0, 1, 2, 3, 4, 5, 6, 7]
commit0 hex: 4701ea597a1aaeab36b00f0627b396b1 ...


v0.1.0 split cell build and run.

Cell 1: Build

In [None]:
%%bash
set -e

apt-get update -y >/dev/null
apt-get install -y build-essential pkg-config >/dev/null

# Rust exists already; just load env
if [ -f "$HOME/.cargo/env" ]; then
  source "$HOME/.cargo/env"
fi

python3 -m pip -q install --upgrade pip
python3 -m pip -q install maturin

rm -rf uisa_v10
mkdir -p uisa_v10/src

cat > uisa_v10/Cargo.toml <<'TOML'
[package]
name = "uisa_v10"
version = "0.10.1"
edition = "2021"

[lib]
name = "uisa_v10"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.21", features = ["extension-module"] }
blake2 = "0.10"
TOML

cat > uisa_v10/src/lib.rs <<'RS'
use blake2::{Blake2s256, Digest};
use pyo3::prelude::*;
use pyo3::types::PyDict;
use std::collections::HashMap;

const Q: usize = 64;
const SLOTS: usize = 30;
const TRIPLETS: usize = 10;

const EPS: f32 = 1e-6;
const TAU_HI: f32 = 1.0;
const TAU_LOW: f32 = -1.0;
const R_FOR_RATIO: f32 = 64.0;

const PRIME_MASK_30: [u8; 30] = [
    0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1
];

fn triplet_idx(t: usize) -> [usize;3] { [3*t, 3*t+1, 3*t+2] }

#[repr(u8)]
#[derive(Clone, Copy, Debug)]
enum Opcode {
    NecLoad = 0x01,
    NecAlu  = 0x02,
    PhaseFlip = 0x03,
    RotAxes   = 0x04,
    ShiftMag  = 0x05,
    BarrierDedup = 0x06,
    BarrierAll   = 0x07,

    Pack       = 0x15,
    Dedup      = 0x17,
    FreeAlias  = 0x18,
    Next       = 0x19,
}

#[derive(Clone, Copy)]
struct DecodedInstr {
    op: Opcode,
    dst_axis: u8,
    src_axis: u8,
    imm8: u8,
    imm32: i32,
    aux32: u32,
}

fn decode16(bytes: &[u8]) -> DecodedInstr {
    assert!(bytes.len() == 16);
    let op = match bytes[0] {
        0x01 => Opcode::NecLoad,
        0x02 => Opcode::NecAlu,
        0x03 => Opcode::PhaseFlip,
        0x04 => Opcode::RotAxes,
        0x05 => Opcode::ShiftMag,
        0x06 => Opcode::BarrierDedup,
        0x07 => Opcode::BarrierAll,
        0x15 => Opcode::Pack,
        0x17 => Opcode::Dedup,
        0x18 => Opcode::FreeAlias,
        0x19 => Opcode::Next,
        _ => panic!("unknown opcode {}", bytes[0]),
    };
    let dst_axis = bytes[3];
    let src_axis = bytes[4];
    let imm8 = bytes[5];
    let imm32 = i32::from_le_bytes([bytes[8],bytes[9],bytes[10],bytes[11]]);
    let aux32 = u32::from_le_bytes([bytes[12],bytes[13],bytes[14],bytes[15]]);
    DecodedInstr { op, dst_axis, src_axis, imm8, imm32, aux32 }
}

#[derive(Clone)]
struct ISA {
    instr_counter: u32,
    mode_pair_hunt: bool,
    exec_active_only: bool,

    barrier_dedup: bool,
    barrier_all: bool,

    has_schedule: bool,

    mag_xyz: [[i32;3]; Q],
    base_sign: [[i32;3]; Q],

    active_mask: [bool; Q],
    winner_of: [usize; Q],

    initiator: [u8; Q],
    swapcount_xyz: [[u8;3]; Q],
    packed_views_u32: [[u32;3]; Q],
    packed90_u64: [[u64;2]; Q],
    commits: [[u8;32]; Q],
    tags_u64: [u64; Q],

    epoch_log: Vec<(u32,u8,u32,u32,u32)>, // (t, barrier_mode, groups, collisions, active)
}

impl ISA {
    fn new(mode_pair_hunt: bool, exec_active_only: bool) -> Self {
        ISA {
            instr_counter: 0,
            mode_pair_hunt,
            exec_active_only,
            barrier_dedup: false,
            barrier_all: false,
            has_schedule: false,
            mag_xyz: [[0;3]; Q],
            base_sign: [[1;3]; Q],
            active_mask: [true; Q],
            winner_of: core::array::from_fn(|i| i),
            initiator: [0u8; Q],
            swapcount_xyz: [[0u8;3]; Q],
            packed_views_u32: [[0u32;3]; Q],
            packed90_u64: [[0u64;2]; Q],
            commits: [[0u8;32]; Q],
            tags_u64: [0u64; Q],
            epoch_log: Vec::new(),
        }
    }

    fn active_only_allowed(&self) -> bool {
        self.mode_pair_hunt && self.exec_active_only && self.has_schedule && !self.barrier_all && !self.barrier_dedup
    }
}

// ---- initiator (full array) ----
fn initiator_from_shared_counts(mag: &[[i32;3]; Q]) -> [u8; Q] {
    let mut init = [0u8; Q];
    let mut counts = [[0i32;3]; Q];

    for axis in 0..3 {
        for q in 0..Q {
            let v = mag[q][axis];
            let mut c = 0i32;
            for k in 0..Q {
                if mag[k][axis] == v { c += 1; }
            }
            counts[q][axis] = c;
        }
    }

    for q in 0..Q {
        let cx = counts[q][0];
        let cy = counts[q][1];
        let cz = counts[q][2];
        let shared = [cx>1, cy>1, cz>1];
        let shared_count = shared.iter().filter(|&&b| b).count();

        let argmax_counts = {
            let mut best = 0usize;
            let mut bestv = counts[q][0];
            for a in 1..3 {
                if counts[q][a] > bestv { best=a; bestv=counts[q][a]; }
            }
            best as u8
        };

        if shared_count == 1 {
            init[q] = argmax_counts;
        } else if shared_count == 2 {
            let mut odd = 0usize;
            for a in 0..3 { if counts[q][a] == 1 { odd=a; break; } }
            init[q] = odd as u8;
        } else if shared_count == 3 {
            init[q] = argmax_counts;
        } else {
            let v0 = mag[q][0]*1000 + 2;
            let v1 = mag[q][1]*1000 + 1;
            let v2 = mag[q][2]*1000 + 0;
            let mut best = 0usize;
            let mut bestv = v0;
            if v1 > bestv { best=1; bestv=v1; }
            if v2 > bestv { best=2; }
            init[q] = best as u8;
        }
    }
    init
}

// ---- NEC -> primaries ----
fn build_primaries_from_nec_for_qubit(mag: [i32;3], sign: [i32;3]) -> [[f32;2]; 6] {
    let mut out = [[0.0f32;2]; 6];
    for axis in 0..3 {
        let m = mag[axis] as f32;
        let real = [ m, -m];
        let unrl = [-m,  m];
        let (first, second) = if sign[axis] < 0 { (unrl, real) } else { (real, unrl) };
        out[2*axis + 0] = first;
        out[2*axis + 1] = second;
    }
    out
}

fn permute_primaries_by_initiator(prim6: [[f32;2];6], init_axis: u8) -> [[f32;2];6] {
    let idx_x = [0usize,1,2,3,4,5];
    let idx_y = [2usize,3,4,5,0,1];
    let idx_z = [4usize,5,0,1,2,3];
    let idx = match init_axis { 0 => idx_x, 1 => idx_y, _ => idx_z };
    let mut out = [[0.0f32;2];6];
    for i in 0..6 { out[i] = prim6[idx[i]]; }
    out
}

fn add_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]+b[0], a[1]+b[1]] }
fn sub_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]-b[0], a[1]-b[1]] }
fn mul_pd(a: [f32;2], b: [f32;2]) -> [f32;2] { [a[0]*b[0], a[1]*b[1]] }
fn div_pd(a: [f32;2], b: [f32;2]) -> [f32;2] {
    let mut out = [0.0f32;2];
    for i in 0..2 { if b[i].abs() > EPS { out[i] = a[i] / b[i]; } }
    out
}

fn build_register30_for_qubit(prim6: [[f32;2];6], swap_sum_out: &mut u8) -> [[f32;2]; SLOTS] {
    let (p0,p1,p2,p3,p4,p5) = (prim6[0],prim6[1],prim6[2],prim6[3],prim6[4],prim6[5]);
    let (a0,a1) = (p0,p1);
    let (b0,b1) = (p2,p3);
    let (c0,c1) = (p4,p5);

    let spec: [([f32;2],[f32;2],u8);8] = [
        (a0,b0,0), (b0,c0,0), (a0,c0,0), (a1,b1,0),
        (a0,b1,1), (b0,c1,1), (a1,c0,1), (b1,c1,1),
    ];

    let mut reg = [[0.0f32;2]; SLOTS];
    reg[0]=p0; reg[1]=p1; reg[2]=p2; reg[3]=p3; reg[4]=p4; reg[5]=p5;

    let mut cursor = 6usize;
    let mut swap_sum = 0u8;
    for (u,v,op3) in spec {
        let addv = add_pd(u,v);
        let subv = sub_pd(u,v);
        let opv  = if op3==0 { mul_pd(u,v) } else { div_pd(u,v) };
        let (first, second, swap) = if subv[0] > addv[0] { (subv, addv, 1u8) } else { (addv, subv, 0u8) };
        swap_sum += swap;
        reg[cursor+0] = first;
        reg[cursor+1] = second;
        reg[cursor+2] = opv;
        cursor += 3;
    }
    *swap_sum_out = swap_sum;
    reg
}

fn detect_collapse_triplet_scatter(pairs: &[[f32;2]; SLOTS]) -> [u8; SLOTS] {
    let mut individual = [0u8; SLOTS];
    for i in 0..SLOTS {
        let real = pairs[i][0];
        let unreal = pairs[i][1];
        let cond1 = (real >= TAU_HI) && (unreal <= TAU_LOW);
        let ratio = if unreal.abs() > EPS { real / unreal } else { 0.0 };
        let cond2 = ratio > R_FOR_RATIO;
        individual[i] = if cond1 || cond2 { 1 } else { 0 };
    }

    let mut final_mask = individual;
    for t in 0..TRIPLETS {
        let idx = triplet_idx(t);
        let a = individual[idx[0]];
        let b = individual[idx[1]];
        let c = individual[idx[2]];
        let uniform = (a==b) && (b==c);
        if uniform {
            final_mask[idx[0]] = a;
            final_mask[idx[1]] = a;
            final_mask[idx[2]] = a;
        } else {
            final_mask[idx[0]] = a;
            final_mask[idx[1]] = b;
            final_mask[idx[2]] = c;
        }
    }
    final_mask
}

fn apply_parity_rotation(pairs: &[[f32;2]; SLOTS], collapse: &[u8; SLOTS]) -> [[f32;2]; SLOTS] {
    let mut rotated = [[0.0f32;2]; SLOTS];
    for i in 0..SLOTS {
        let affected = (PRIME_MASK_30[i] > 0) || (collapse[i] > 0);
        let s = if affected { -1.0f32 } else { 1.0f32 };
        rotated[i] = [pairs[i][0]*s, pairs[i][1]*s];
    }
    rotated
}

fn bitmap(rotated: &[[f32;2]; SLOTS]) -> [u8; SLOTS] {
    let mut bits = [0u8; SLOTS];
    for i in 0..SLOTS { bits[i] = if rotated[i][0] > EPS { 1 } else { 0 }; }
    bits
}

fn pack30_to_u32(bits: &[u8; SLOTS]) -> u32 {
    let mut x = 0u32;
    for i in 0..30 { x |= (bits[i] as u32) << i; }
    x
}

fn pack90(b0: u32, b1: u32, b2: u32) -> (u64,u64) {
    let b0 = (b0 & ((1u32<<30)-1)) as u64;
    let b1 = (b1 & ((1u32<<30)-1)) as u64;
    let b2 = (b2 & ((1u32<<30)-1)) as u64;
    let low = b0 | (b1<<30) | ((b2 & 0xF)<<60);
    let high = b2 >> 4;
    (low, high)
}

fn commit_full90(instr_counter: u32, w0: u32, w1: u32, w2: u32, initiator: u8, scx: u8, scy: u8, scz: u8) -> [u8;32] {
    let mut h = Blake2s256::new();
    h.update(b"NTHISA90");
    h.update(&instr_counter.to_le_bytes());
    h.update(&w0.to_le_bytes());
    h.update(&w1.to_le_bytes());
    h.update(&w2.to_le_bytes());
    h.update(&[initiator, scx, scy, scz]);
    let out = h.finalize();
    let mut a = [0u8;32];
    a.copy_from_slice(&out[..]);
    a
}

fn tag_u64(commit: &[u8;32], q: usize, instr_counter: u32) -> u64 {
    let mut h = Blake2s256::new();
    h.update(b"NTH_TAG0");
    h.update(commit);
    h.update(&(q as u16).to_le_bytes());
    h.update(&instr_counter.to_le_bytes());
    let out = h.finalize();
    u64::from_le_bytes(out[0..8].try_into().unwrap())
}

fn state_groups(commits: &[[u8;32]; Q]) -> usize {
    let mut m: HashMap<[u8;32], u32> = HashMap::new();
    for q in 0..Q { *m.entry(commits[q]).or_insert(0) += 1; }
    m.len()
}

fn dedup(commits: &[[u8;32]; Q], efficiency: &[u32; Q]) -> (u32,u32,[bool;Q],[usize;Q]) {
    let mut map: HashMap<[u8;32], Vec<usize>> = HashMap::new();
    for q in 0..Q { map.entry(commits[q]).or_insert_with(Vec::new).push(q); }

    let mut winner_of = core::array::from_fn(|i| i);
    let mut active_mask = [true; Q];
    let mut collision_qubits = 0u32;

    for (_k, qs) in map.iter() {
        if qs.len() > 1 { collision_qubits += (qs.len() as u32) - 1; }
        let mut best = qs[0];
        for &q in qs.iter() {
            let eb = efficiency[best];
            let eq = efficiency[q];
            if (eq < eb) || (eq == eb && q < best) { best = q; }
        }
        for &q in qs.iter() {
            winner_of[q] = best;
            if q != best { active_mask[q] = false; }
        }
    }

    (map.len() as u32, collision_qubits, active_mask, winner_of)
}

fn free_alias(st: &mut ISA) {
    if st.mode_pair_hunt {
        for q in 0..Q {
            if !st.active_mask[q] {
                let w = st.winner_of[q];
                st.mag_xyz[q] = st.mag_xyz[w];
                st.base_sign[q] = st.base_sign[w];
            }
        }
    } else {
        for q in 0..Q {
            if !st.active_mask[q] {
                let mut h = Blake2s256::new();
                h.update(b"NTH_FREE0");
                h.update(&st.commits[q]);
                h.update(&(q as u16).to_le_bytes());
                h.update(&(st.instr_counter + 1).to_le_bytes());
                let out = h.finalize();
                let m0 = (out[0] as i32) & 63;
                let m1 = (out[1] as i32) & 63;
                let m2 = (out[2] as i32) & 63;
                let sb = out[3];
                let s0 = if (sb & 1) != 0 { 1 } else { -1 };
                let s1 = if (sb & 2) != 0 { 1 } else { -1 };
                let s2 = if (sb & 4) != 0 { 1 } else { -1 };
                st.mag_xyz[q] = [m0,m1,m2];
                st.base_sign[q] = [s0,s1,s2];
            }
        }
        st.active_mask = [true; Q];
        st.winner_of = core::array::from_fn(|i| i);
        st.has_schedule = false;
    }
}

fn stage_measure_transcode(st: &mut ISA) {
    st.initiator = initiator_from_shared_counts(&st.mag_xyz);

    let active_only = st.active_only_allowed();
    let mut do_q = [true; Q];
    if active_only {
        for q in 0..Q { do_q[q] = st.active_mask[q]; }
    }

    // winners
    for q in 0..Q {
        if !do_q[q] { continue; }
        let prim6 = build_primaries_from_nec_for_qubit(st.mag_xyz[q], st.base_sign[q]);

        for v in 0..3 {
            let prim_v = permute_primaries_by_initiator(prim6, v as u8);
            let mut swap_sum = 0u8;
            let reg30 = build_register30_for_qubit(prim_v, &mut swap_sum);
            st.swapcount_xyz[q][v] = swap_sum;

            let coll = detect_collapse_triplet_scatter(&reg30);
            let rot = apply_parity_rotation(&reg30, &coll);
            let bits = bitmap(&rot);

            st.packed_views_u32[q][v] = pack30_to_u32(&bits);
        }

        let (lo, hi) = pack90(st.packed_views_u32[q][0], st.packed_views_u32[q][1], st.packed_views_u32[q][2]);
        st.packed90_u64[q] = [lo, hi];

        st.commits[q] = commit_full90(
            st.instr_counter,
            st.packed_views_u32[q][0], st.packed_views_u32[q][1], st.packed_views_u32[q][2],
            st.initiator[q],
            st.swapcount_xyz[q][0], st.swapcount_xyz[q][1], st.swapcount_xyz[q][2]
        );
        st.tags_u64[q] = tag_u64(&st.commits[q], q, st.instr_counter);
    }

    // aliases in PAIR_HUNT
    if active_only && st.mode_pair_hunt {
        for q in 0..Q {
            if !st.active_mask[q] {
                let w = st.winner_of[q];
                st.packed_views_u32[q] = st.packed_views_u32[w];
                st.packed90_u64[q] = st.packed90_u64[w];
                st.swapcount_xyz[q] = st.swapcount_xyz[w];
                st.commits[q] = st.commits[w];
                st.tags_u64[q] = tag_u64(&st.commits[w], q, st.instr_counter);
                st.initiator[q] = st.initiator[w];
            }
        }
    }
}

fn exec_one(st: &mut ISA, ins: DecodedInstr) {
    match ins.op {
        Opcode::NecLoad => {
            let mut x = (st.instr_counter as u64) ^ (ins.aux32 as u64) ^ 0x9E3779B97F4A7C15u64;
            for q in 0..Q {
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][0] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][1] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                st.mag_xyz[q][2] = (x as i32) & 63;
                x ^= x << 13; x ^= x >> 7; x ^= x << 17;
                let sb = (x as u8) & 7;
                st.base_sign[q][0] = if (sb & 1)!=0 { 1 } else { -1 };
                st.base_sign[q][1] = if (sb & 2)!=0 { 1 } else { -1 };
                st.base_sign[q][2] = if (sb & 4)!=0 { 1 } else { -1 };
            }
            st.active_mask = [true; Q];
            st.winner_of = core::array::from_fn(|i| i);
            st.has_schedule = false;
            st.barrier_dedup = false;
            st.barrier_all = false;
        }
        Opcode::NecAlu => {
            let subop = ins.imm8;
            let dst = (ins.dst_axis as usize).min(2);
            let src = (ins.src_axis as usize).min(2);
            let imm = ins.imm32;
            for q in 0..Q {
                let a = st.mag_xyz[q][dst];
                let b = st.mag_xyz[q][src];
                let v = match subop {
                    0 => (a + b) & 63,
                    1 => (a - b) & 63,
                    2 => (a * b) & 63,
                    3 => if b==0 { a } else { (a / b) & 63 },
                    4 => (a ^ b) & 63,
                    5 => (a + imm) & 63,
                    _ => a,
                };
                st.mag_xyz[q][dst] = v;
            }
        }
        Opcode::PhaseFlip => {
            let axis = (ins.dst_axis as usize).min(2);
            for q in 0..Q { st.base_sign[q][axis] *= -1; }
        }
        Opcode::RotAxes => {
            let dir = ins.imm8;
            for q in 0..Q {
                if dir == 0 {
                    let m = st.mag_xyz[q];
                    let s = st.base_sign[q];
                    st.mag_xyz[q] = [m[2], m[0], m[1]];
                    st.base_sign[q] = [s[2], s[0], s[1]];
                } else {
                    let m = st.mag_xyz[q];
                    let s = st.base_sign[q];
                    st.mag_xyz[q] = [m[1], m[2], m[0]];
                    st.base_sign[q] = [s[1], s[2], s[0]];
                }
            }
        }
        Opcode::ShiftMag => {
            let axis = (ins.dst_axis as usize).min(2);
            let sh = ins.imm32;
            for q in 0..Q {
                let a = st.mag_xyz[q][axis];
                let v = if sh >= 0 { (a << sh) & 63 } else { (a >> (-sh)) & 63 };
                st.mag_xyz[q][axis] = v;
            }
        }
        Opcode::BarrierDedup => st.barrier_dedup = true,
        Opcode::BarrierAll => {
            st.barrier_all = true;
            st.active_mask = [true; Q];
            st.winner_of = core::array::from_fn(|i| i);
            st.has_schedule = false;
        }
        Opcode::Pack => stage_measure_transcode(st),
        Opcode::Dedup => {
            if st.barrier_all {
                st.epoch_log.push((st.instr_counter, 2, 0, 0, 64));
                st.active_mask = [true; Q];
                st.winner_of = core::array::from_fn(|i| i);
                st.has_schedule = false;
            } else if st.barrier_dedup {
                let active = st.active_mask.iter().filter(|&&b| b).count() as u32;
                st.epoch_log.push((st.instr_counter, 1, 0, 0, active));
            } else {
                let mut eff = [0u32; Q];
                for q in 0..Q {
                    eff[q] = (st.swapcount_xyz[q][0] as u32)
                           + (st.swapcount_xyz[q][1] as u32)
                           + (st.swapcount_xyz[q][2] as u32);
                }
                let (groups, collisions, am, wo) = dedup(&st.commits, &eff);
                st.active_mask = am;
                st.winner_of = wo;
                st.has_schedule = true;
                let active = st.active_mask.iter().filter(|&&b| b).count() as u32;
                st.epoch_log.push((st.instr_counter, 0, groups, collisions, active));
            }
        }
        Opcode::FreeAlias => free_alias(st),
        Opcode::Next => {
            st.instr_counter += 1;
            st.barrier_dedup = false;
            st.barrier_all = false;
        }
    }
}

fn exec_program(st: &mut ISA, bytecode: &[u8]) {
    assert!(bytecode.len() % 16 == 0);
    let n = bytecode.len() / 16;
    for i in 0..n {
        let ins = decode16(&bytecode[i*16..i*16+16]);
        exec_one(st, ins);
    }
}

#[pyfunction]
fn run_uisa(bytecode: Vec<u8>, mode: &str, exec_active_only: bool) -> PyResult<PyObject> {
    if bytecode.len() % 16 != 0 {
        return Err(pyo3::exceptions::PyValueError::new_err("bytecode length must be multiple of 16"));
    }
    let mode_pair_hunt = match mode {
        "FREE" => false,
        "PAIR_HUNT" => true,
        _ => return Err(pyo3::exceptions::PyValueError::new_err("mode must be 'FREE' or 'PAIR_HUNT'")),
    };

    let mut st = ISA::new(mode_pair_hunt, exec_active_only);
    exec_program(&mut st, &bytecode);

    let mut packed_views: Vec<u32> = Vec::with_capacity(Q*3);
    let mut packed90: Vec<u64> = Vec::with_capacity(Q*2);
    let mut initiator: Vec<u8> = Vec::with_capacity(Q);
    let mut active: Vec<u8> = Vec::with_capacity(Q);
    let mut winner: Vec<u32> = Vec::with_capacity(Q);
    let mut tags: Vec<u64> = Vec::with_capacity(Q);
    let mut commits: Vec<u8> = Vec::with_capacity(Q*32);

    for q in 0..Q {
        packed_views.extend_from_slice(&st.packed_views_u32[q]);
        packed90.push(st.packed90_u64[q][0]);
        packed90.push(st.packed90_u64[q][1]);
        initiator.push(st.initiator[q]);
        active.push(if st.active_mask[q] { 1 } else { 0 });
        winner.push(st.winner_of[q] as u32);
        tags.push(st.tags_u64[q]);
        commits.extend_from_slice(&st.commits[q]);
    }

    let groups = state_groups(&st.commits);

    Python::with_gil(|py| {
        let dict = PyDict::new(py);
        dict.set_item("instr_counter", st.instr_counter)?;
        dict.set_item("mode_pair_hunt", st.mode_pair_hunt)?;
        dict.set_item("state_groups", groups)?;
        dict.set_item("packed_views_u32_flat", packed_views)?;
        dict.set_item("packed90_u64_flat", packed90)?;
        dict.set_item("initiator", initiator)?;
        dict.set_item("active_mask", active)?;
        dict.set_item("winner_of", winner)?;
        dict.set_item("tags_u64", tags)?;
        dict.set_item("commits_bytes", commits)?;
        dict.set_item("epoch_log", st.epoch_log)?;
        Ok(dict.into())
    })
}

#[pymodule]
fn uisa_v10(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_function(wrap_pyfunction!(run_uisa, m)?)?;
    Ok(())
}
RS

cd uisa_v10
source $HOME/.cargo/env
maturin build -q --release
python3 -m pip -q install target/wheels/uisa_v10-*.whl
echo "Built and installed uisa_v10 v0.10.1."

Built and installed uisa_v10 v0.10.1.


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
    Updating crates.io index
     Locking 36 packages to latest compatible versions
      Adding generic-array v0.14.7 (available: v0.14.9)
      Adding pyo3 v0.21.2 (available: v0.27.2)
🔗 Found pyo3 bindings
🐍 Found CPython 3.12 at /usr/bin/python3
   --> src/lib.rs:610:28
    |
610 |         let dict = PyDict::new(py);
    |                            ^^^
    |
    = note: `#[warn(deprecated)]` on by default

   --> src/lib.rs:627:26
    |
627 | fn uisa_v10(_py: Python, m: &PyModule) -> PyResult<()> {
    |                          ^

📦 Built wheel for CPython 3.12 to /content/uisa_v10/target/wheels/uisa_v10-0.10.1-cp312-cp312-manylinux_2_34_x86_64.whl


Cell 2: Run

In [None]:
import struct
import numpy as np
import uisa_v10

def emit(op, flags=0, gpr_mask=0, dst=0, src=0, imm8=0, imm32=0, aux32=0):
    b = bytearray(16)
    b[0] = op
    b[1] = flags
    b[2] = gpr_mask
    b[3] = dst
    b[4] = src
    b[5] = imm8
    b[8:12] = struct.pack("<i", imm32)
    b[12:16] = struct.pack("<I", aux32)
    return bytes(b)

# opcodes
OP_NEC_LOAD      = 0x01
OP_NEC_ALU       = 0x02
OP_PHASE_FLIP    = 0x03
OP_ROT_AXES      = 0x04
OP_SHIFT_MAG     = 0x05
OP_BARRIER_DEDUP = 0x06
OP_BARRIER_ALL   = 0x07
OP_PACK          = 0x15
OP_DEDUP         = 0x17
OP_FREE_ALIAS    = 0x18
OP_NEXT          = 0x19

# subops
ALU_ADD  = 0
ALU_SUB  = 1
ALU_MUL  = 2
ALU_DIV  = 3
ALU_XOR  = 4
ALU_ADDI = 5

# Program: epoch0 does dedup; epoch1 uses dedup-skip-preserve barrier (so PAIR_HUNT keeps schedule);
# epoch2 uses all-exec barrier (forces all active).
bc = b"".join([
    emit(OP_NEC_LOAD, aux32=11),

    emit(OP_NEC_ALU, dst=0, src=1, imm8=ALU_ADD),
    emit(OP_NEC_ALU, dst=2, src=0, imm8=ALU_XOR),
    emit(OP_SHIFT_MAG, dst=1, imm32=1),
    emit(OP_PHASE_FLIP, dst=0),
    emit(OP_ROT_AXES, imm8=0),

    emit(OP_PACK),
    emit(OP_DEDUP),
    emit(OP_FREE_ALIAS),
    emit(OP_NEXT),

    # epoch1: compute again, but prevent dedup update while preserving schedule
    emit(OP_NEC_ALU, dst=0, src=2, imm8=ALU_MUL),
    emit(OP_PACK),
    emit(OP_BARRIER_DEDUP),
    emit(OP_DEDUP),
    emit(OP_NEXT),

    # epoch2: force all active barrier
    emit(OP_PACK),
    emit(OP_BARRIER_ALL),
    emit(OP_DEDUP),
    emit(OP_NEXT),
])

def reshape(out):
    packed_views = np.array(out["packed_views_u32_flat"], dtype=np.uint32).reshape(64,3)
    packed90 = np.array(out["packed90_u64_flat"], dtype=np.uint64).reshape(64,2)
    initiator = np.array(out["initiator"], dtype=np.uint8)
    active = np.array(out["active_mask"], dtype=np.uint8)
    winner = np.array(out["winner_of"], dtype=np.uint32)
    tags = np.array(out["tags_u64"], dtype=np.uint64)
    commits = bytes(out["commits_bytes"])
    commits = [commits[i*32:(i+1)*32] for i in range(64)]
    return packed_views, packed90, initiator, active, winner, tags, commits

for mode in ["FREE", "PAIR_HUNT"]:
    out = uisa_v10.run_uisa(bytecode=list(bc), mode=mode, exec_active_only=True)
    packed_views, packed90, initiator, active, winner, tags, commits = reshape(out)

    print("\n=== v0.10 Rust module run ===")
    print("mode:", mode, "instr_counter:", out["instr_counter"], "state_groups:", out["state_groups"])
    print("packed_views_u32[0]:", packed_views[0].tolist())
    print("packed90_u64[0]:", packed90[0].tolist())
    print("initiator counts:", np.bincount(initiator, minlength=3).tolist())
    print("active count:", int(active.sum()), "aliases:", 64-int(active.sum()))
    print("winner_of[0..7]:", winner[:8].tolist())
    print("tags_u64[0..3]:", tags[:4].tolist())
    print("commit0:", commits[0].hex()[:32], "...")

    # epoch log: (t, barrier_mode, groups, collisions, active)
    # barrier_mode: 0=normal,1=dedup-skip-preserve,2=all-exec
    print("epoch_log:", out["epoch_log"])


=== v0.10 Rust module run ===
mode: FREE instr_counter: 3 state_groups: 50
packed_views_u32[0]: [604514724, 538601608, 76677124]
packed90_u64[0]: [5190005092015155620, 4792320]
initiator counts: [27, 16, 21]
active count: 64 aliases: 0
winner_of[0..7]: [0, 1, 2, 3, 4, 5, 6, 7]
tags_u64[0..3]: [14082824732673922278, 7249405683105673377, 2863212038624005551, 13821119079048824300]
commit0: f8b7e123c69c2bdbcbe82277dbd1e593 ...
epoch_log: [(0, 0, 47, 17, 47), (1, 1, 0, 0, 64), (2, 2, 0, 0, 64)]

=== v0.10 Rust module run ===
mode: PAIR_HUNT instr_counter: 3 state_groups: 44
packed_views_u32[0]: [604514724, 538601608, 76677124]
packed90_u64[0]: [5190005092015155620, 4792320]
initiator counts: [17, 20, 27]
active count: 64 aliases: 0
winner_of[0..7]: [0, 1, 2, 3, 4, 5, 6, 7]
tags_u64[0..3]: [14082824732673922278, 16799423996720699811, 9032297607405725997, 13821119079048824300]
commit0: f8b7e123c69c2bdbcbe82277dbd1e593 ...
epoch_log: [(0, 0, 47, 17, 47), (1, 1, 0, 0, 47), (2, 2, 0, 0, 64)]


v0.1.1 GPR, State Select, Active/Freed Exposed Mask

Cell 1 Build

Cell 2 Run