In [42]:
from datetime import datetime
import numpy as np
from numba import cuda
from tabulate import tabulate
import matplotlib.pyplot as plt
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32

In [43]:
TPB = 16
pi_calculation_count = 16
BPG = int(pi_calculation_count/TPB)

In [44]:
def cpu_pi_calcul(pi_calculation_count, N):
    res = np.zeros(pi_calculation_count)
    for i in range(pi_calculation_count):
        x = np.random.uniform(size=N)
        y = np.random.uniform(size=N)
        z = x ** 2 + y ** 2 <= 1
        res[i] = 4.0 * sum(z) / N
    return res

In [45]:
@cuda.jit 
def pi_calcul(res, rng):
  i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
  h = 0
  # xoroshiro128p_uniform_float32 - Return a float32 in range [0.0, 1.0)
  if i < len(rng):
    x = xoroshiro128p_uniform_float32(rng, i)
    y = xoroshiro128p_uniform_float32(rng, i)
    if x ** 2 + y ** 2 <= 1.0:
      h += 1
  cuda.atomic.add(res, 0, h)

In [46]:
def gpu_pi_calcul(N):

  # create_xoroshiro128p_states - for random generated array on CUDA device
  rng_states = create_xoroshiro128p_states(N, seed=1)
  res = [0]

  dev_res = cuda.to_device(res)
  pi_calcul[N, TPB](dev_res, rng_states)
  tmp_ = dev_res.copy_to_host()
  pi_ = 4 * tmp_[0] / N
  return pi_

In [47]:
def main(iteration_count , N):
  for i in range(iteration_count):
    print("Iteration number:", i+1)
    start = datetime.now()
    cpu_pi = cpu_pi_calcul(pi_calculation_count, N).mean()
    print("CPU TIME : ", datetime.now()- start)
    print("CPU Pi : ", cpu_pi)
    start = datetime.now()
    gpu_pi = gpu_pi_calcul(N)
    print("GPU TIME :", datetime.now()- start)
    print("GPU Pi : ", gpu_pi)
    print("Number of points : ", N)
    print()
    N = N + 10000
  return None

In [48]:

main(10, 10000)

Iteration number: 1
CPU TIME :  0:00:00.323030
CPU Pi :  3.1454750000000002
GPU TIME : 0:00:00.137483
GPU Pi :  3.1552
Number of points :  10000

Iteration number: 2
CPU TIME :  0:00:00.632540
CPU Pi :  3.1444625
GPU TIME : 0:00:00.009212
GPU Pi :  3.1578
Number of points :  20000

Iteration number: 3
CPU TIME :  0:00:00.952985
CPU Pi :  3.1451833333333337
GPU TIME : 0:00:00.012079
GPU Pi :  3.1508
Number of points :  30000

Iteration number: 4
CPU TIME :  0:00:01.265431
CPU Pi :  3.1398
GPU TIME : 0:00:00.014167
GPU Pi :  3.1471
Number of points :  40000

Iteration number: 5
CPU TIME :  0:00:01.528847
CPU Pi :  3.140835
GPU TIME : 0:00:00.018084
GPU Pi :  3.14304
Number of points :  50000

Iteration number: 6
CPU TIME :  0:00:01.833783
CPU Pi :  3.1421166666666664
GPU TIME : 0:00:00.022789
GPU Pi :  3.1470666666666665
Number of points :  60000

Iteration number: 7
CPU TIME :  0:00:02.240020
CPU Pi :  3.142714285714286
GPU TIME : 0:00:00.021893
GPU Pi :  3.147542857142857
Number of poi