筛素数
>https://github.com/GaleSeLee/taichi_tutorial/blob/main/example.ipynb

In [4]:
import taichi as ti
import time
ti.init(arch=ti.cpu, kernel_profiler=True)
@ti.func
def is_prime(n: int):
   result = True
   for k in range(2, int(n ** 0.5) + 1):
       if n % k == 0:
           result = False
           break
   return result
@ti.kernel
def count_primes(n: int) -> int:
   count = 0
   for k in range(2, n):
       if is_prime(k):
           count += 1
   return count
#start_time = time.time()
print(count_primes(1000000))
# Taichi 1000000
#    CPU 0.015s
#    GPU 0.025s
# Python 1000000
#    CPU 3.48s
#print(time.time()-start_time)
ti.profiler.print_kernel_profiler_info()

[Taichi] Starting on arch=x64
78498
Kernel Profiler(count, default) @ X64 
[      %     total   count |      min       avg       max   ] Kernel name
-------------------------------------------------------------------------
[100.00%   0.008 s      1x |    8.052     8.052     8.052 ms] count_primes_c74_0_kernel_1_range_for
[  0.00%   0.000 s      1x |    0.000     0.000     0.000 ms] count_primes_c74_0_kernel_0_serial
[  0.00%   0.000 s      1x |    0.000     0.000     0.000 ms] count_primes_c74_0_kernel_2_serial
-------------------------------------------------------------------------
[100.00%] Total execution time:   0.008 s   number of results: 3


In [10]:
import taichi as ti
import time
ti.init(arch=ti.gpu, kernel_profiler=True)
@ti.func
def is_prime(n: int):
   result = True
   for k in range(2, int(n ** 0.5) + 1):
       if n % k == 0:
           result = False
           break
   return result
@ti.kernel
def count_primes(n: int) -> int:
   count = 0
   for k in range(2, n):
       if is_prime(k):
           count += 1
   return count
#start_time = time.time()
print(count_primes(1000000))
# Taichi 1000000
#    CPU 0.015s
#    GPU 0.025s
# Python 1000000
#    CPU 3.48s
#print(time.time()-start_time)
ti.profiler.print_kernel_profiler_info()

[Taichi] Starting on arch=cuda
78498
Kernel Profiler(count, default) @ CUDA on NVIDIA GeForce RTX 4060 Laptop GPU
[      %     total   count |      min       avg       max   ] Kernel name
----------------------------------------------------------------------------
[ 84.65%   0.022 s      1x |   21.647    21.647    21.647 ms] count_primes_c74_0_kernel_1_range_for
[  8.42%   0.002 s      1x |    2.153     2.153     2.153 ms] runtime_initialize
[  6.55%   0.002 s      1x |    1.675     1.675     1.675 ms] runtime_initialize_rand_states_cuda
[  0.15%   0.000 s      1x |    0.038     0.038     0.038 ms] runtime_initialize_snodes
[  0.11%   0.000 s      1x |    0.028     0.028     0.028 ms] runtime_initialize_runtime_context_buffer
[  0.04%   0.000 s      1x |    0.011     0.011     0.011 ms] runtime_memory_allocate_aligned
[  0.04%   0.000 s      1x |    0.011     0.011     0.011 ms] count_primes_c74_0_kernel_0_serial
[  0.03%   0.000 s      1x |    0.008     0.008     0.008 ms] count_prime

WSL 调用GPU出错问题解决
>https://github.com/taichi-dev/taichi/issues/5883

In [7]:
import time
def is_prime(n: int):
   result = True
   for k in range(2, int(n ** 0.5) + 1):
       if n % k == 0:
           result = False
           break
   return result

def count_primes(n: int) -> int:
   count = 0
   for k in range(2, n):
       if is_prime(k):
           count += 1
   return count
start_time = time.time()
print(count_primes(1000000))
end_time = time.time()

time_cost = end_time - start_time
print(f"{time_cost} s")

78498
1.2462222576141357 s
