In [1]:
import warp as wp 
import numpy as np

In [2]:
wp.init()

Warp 1.0.2 initialized:
   CUDA Toolkit 11.5, Driver 12.1
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "NVIDIA RTX A4000" (16 GiB, sm_86, mempool enabled)
   Kernel cache:
     /home/plunder/.cache/warp/1.0.2


In [3]:
@wp.kernel
def force(points: wp.array(dtype=wp.vec3), 
          forces: wp.array(dtype=wp.vec3), 
          grid: wp.uint64, 
          radius: float):
    
    tid = wp.tid()

    p = points[tid]

    query = wp.hash_grid_query(grid, p, radius)
    index = int(0)

    f = wp.vec3()

    while(wp.hash_grid_query_next(query, index)):

        neighbour = points[index]
        n = p - neighbour

        dist = wp.length(n)
        if( 0 < dist and dist <= radius ):
            f += n / dist 

    forces[tid] = f 

In [20]:
data = np.load("../example_atomic.npz")
forces_ref = data["forces"]
num_points = forces_ref.shape[0]

In [12]:
points = wp.array(data["points"], dtype=wp.vec3)
forces = wp.zeros(num_points, dtype=wp.vec3)

radius = data["radius"]
gridsize = data["gridsize"]

grid = wp.HashGrid(gridsize[0], gridsize[1], gridsize[2], device="cuda")
grid.build(points=points, radius=radius)

In [17]:
gridsize

array([17, 17, 17], dtype=int32)

In [13]:
data["points"]

array([[ 15.806302  ,  75.2418    , 150.55765   ],
       [135.35553   , 197.11548   ,  41.53925   ],
       [165.93033   , 168.15778   , 144.40399   ],
       ...,
       [ 69.86019   , 101.18514   , 144.26186   ],
       [136.34462   ,  26.644041  ,   0.76220226],
       [ 92.3297    ,   3.454836  ,  20.581656  ]], dtype=float32)

In [14]:
def compute(points, forces, grid, radius):
    wp.launch(kernel=force, dim = len(points), inputs = [points, forces, grid.id, radius])
    wp.synchronize_device("cuda")

In [16]:
%time compute(points, forces, grid, radius)

CPU times: user 2.09 s, sys: 0 ns, total: 2.09 s
Wall time: 2.09 s


In [21]:
forces_cpu = forces.numpy()

In [29]:
np.linalg.norm(forces_cpu - forces_ref) / np.linalg.norm(forces_ref)

0.0001573758

In [23]:
forces_cpu

array([[   4.7778563,   10.151554 ,    3.032158 ],
       [ -23.255793 ,   -5.20872  ,    1.5006969],
       [  -8.961151 ,  -19.831896 ,   12.292814 ],
       ...,
       [   2.7903137,    6.4610176,   10.859381 ],
       [  -9.591168 ,  -20.324755 , -153.9948   ],
       [ -13.220029 , -143.52696  ,   12.89382  ]], dtype=float32)

In [24]:
forces_ref

array([[   4.777826 ,   10.151538 ,    3.0321615],
       [ -23.255802 ,   -5.208677 ,    1.5006713],
       [  -8.961161 ,  -19.831894 ,   12.292823 ],
       ...,
       [   2.7902865,    6.4609957,   10.859321 ],
       [  -9.5911875,  -20.324778 , -153.99481  ],
       [ -13.220052 , -143.52702  ,   12.893774 ]], dtype=float32)