In [None]:
import triton
import triton.language as tl

@triton.jit
def dotproduct(a_ptr, b_ptr, result_ptr, n, BLOCK_SIZE: tl.constexpr):
    a_ptr = a_ptr.to(tl.pointer_type(tl.float32))
    b_ptr = b_ptr.to(tl.pointer_type(tl.float32))
    result_ptr = result_ptr.to(tl.pointer_type(tl.float32))
    pid = tl.program_id(0)
    block_start = pid * BLOCK_SIZE
    offsets = block_start + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n

    a = tl.load(a_ptr + offsets, mask=mask)
    b = tl.load(b_ptr + offsets, mask=mask)
    result = a * b
    total = tl.sum(result, axis=0)

    tl.atomic_add(result_ptr, total)

def solve(a_ptr: int, b_ptr: int, result_ptr: int, n: int):
    BLOCK_SIZE = 256
    grid = (triton.cdiv(n, BLOCK_SIZE),)
    dotproduct[grid](
        a_ptr, b_ptr, result_ptr, n,
        BLOCK_SIZE=BLOCK_SIZE
    )
