In [None]:
import numpy as np
from numba import cuda

@cuda.jit
def conditional_kernel(A, B, C):
    i = cuda.grid(1)
    
    if i < A.size:
        val_a = A[i]
        val_b = B[i]
        
        # Your Logic:
        # If both are < 0.5, add them.
        if val_a < 0.5 and val_b < 0.5:
            C[i] = val_a + val_b
        else:
            # Otherwise, subtract the greater from the lesser (lesser - greater)
            if val_a > val_b:
                C[i] = val_b - val_a
            else:
                C[i] = val_a - val_b

def main():
    n = 1_000_000
    
    # 1. Generate random values between 0 and 1
    host_a = np.random.random(n).astype(np.float32)
    host_b = np.random.random(n).astype(np.float32)
    host_c = np.zeros(n, dtype=np.float32)

    # 2. Move to GPU
    device_a = cuda.to_device(host_a)
    device_b = cuda.to_device(host_b)
    device_c = cuda.to_device(host_c)

    # 3. Configure Grid
    threads_per_block = 256
    blocks_per_grid = (n + (threads_per_block - 1)) // threads_per_block

    # 4. Launch
    conditional_kernel[blocks_per_grid, threads_per_block](device_a, device_b, device_c)

    # 5. Bring result back
    host_c = device_c.copy_to_host()

    # Verify a few samples
    for j in range(5):
        print(f"A: {host_a[j]:.3f}, B: {host_b[j]:.3f} -> Result: {host_c[j]:.3f}")

if __name__ == "__main__":
    main()