In [1]:
import time
from numba import cuda, float32
import numpy as np

In [6]:
def cpu_pi(points):
  time_start = time.time()
  res = 0
  for point in points:
    if point[0] ** 2 + point[1] ** 2 <= 1:
      res += 1
  time_end = time.time()
  time_final = time_end - time_start

  pi_cpu = 4 * res / n

  return pi_cpu, time_final

In [7]:
@cuda.jit
def cuda_pi(points, count):
  index = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x

  while index < points.shape[0]:
      if points[index][0] ** 2 + points[index][1] ** 2 <= 1:
        count[index] = 1
      else:
        count[index] = 0
      index += cuda.blockDim.x * cuda.gridDim.x


In [8]:
def gpu_pi(points, n):
  time_start = time.time()

  count = np.zeros(1)
  points = points.astype(np.float32)
  count = count.astype(np.int32)
  d_points = cuda.to_device(points)
  d_count = cuda.device_array(n, dtype=np.int32)

  threads_per_block = 256
  blocks_per_grid = (n + (threads_per_block - 1)) // threads_per_block
  cuda_pi[blocks_per_grid, threads_per_block](d_points, d_count)

  count = d_count.copy_to_host().sum()
  pi_gpu = 4.0 * count / n

  time_end = time.time()
  time_final = time_end - time_start

  return pi_gpu, time_final

In [30]:
n = 10 ** 4
points = np.random.rand(n, 2)
device = cuda.get_current_device()
pi_gpu, time_gpu = gpu_pi(points, n)
pi_cpu, time_cpu = cpu_pi(points)
print(f"Время GPU = {time_gpu}")
print(f"Время CPU = {time_cpu}")
print(f"Ускорение = {time_cpu / time_gpu}")
print(f"GPU: pi = {pi_gpu}")
print(f"CPU: pi = {pi_cpu}")

Время GPU = 0.0013608932495117188
Время CPU = 0.007158994674682617
Ускорение = 5.260511562718991
GPU: pi = 3.1332
CPU: pi = 3.1332
