In [1]:
from __future__ import print_function, absolute_import

from numba import cuda
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32, xoroshiro128p_normal_float32
import numpy as np

@cuda.jit
def compute_pi(rng_states, iterations, out):
    """Find the maximum value in values and store in result[0]"""
    thread_id = cuda.grid(1)

    # Compute pi by drawing random (x, y) points and finding what
    # fraction lie inside a unit circle
    inside = 0
    for i in range(iterations):
        
        x = xoroshiro128p_uniform_float32(rng_states, thread_id)
        y = xoroshiro128p_normal_float32(rng_states, thread_id)
        if x**2 + y**2 <= 1.0:
            inside += 1

    out[thread_id] = 4.0 * inside / iterations

threads_per_block = 64
blocks = 24
rng_states = create_xoroshiro128p_states(threads_per_block * blocks, seed=1)
out = np.zeros(threads_per_block * blocks, dtype=np.float32)

compute_pi[blocks, threads_per_block](rng_states, 200000, out)
print('pi:', out.mean())




pi: 2.2287412




In [2]:
device = cuda.get_current_device()
print(f"Max grid dimension (x): {device.MAX_GRID_DIM_X}")
print(f"Max threads per block: {device.MAX_THREADS_PER_BLOCK}")
print(f"Max blocks per grid: {device.MAX_GRID_DIM_X * device.MAX_GRID_DIM_Y * device.MAX_GRID_DIM_Z}")

Max grid dimension (x): 2147483647
Max threads per block: 1024
Max blocks per grid: 9223090559730712575
