In [1]:
from fireworks.ic import ic_two_body as ic_two_body
from fireworks.ic import ic_random_uniform as ic_random_uniform

from fireworks.nbodylib import dynamics as dyn
from fireworks.nbodylib import integrators as intg

import matplotlib.pyplot as plt
import numpy as np
from typing import Optional, Tuple
import numpy.typing as npt
from fireworks.particles import Particles


import numba
from numba import prange, njit

In [2]:
mass1 = 1.0
mass2 = 1.0
rp = 1.0
e = 0.0

particles = ic_two_body(mass1, mass2, rp, e)

#particles = ic_random_uniform(100,[1,1],[1,10],[0,1])

## NUMBA

In [3]:
pos = particles.pos
N_particles = len(particles)    
mass = particles.mass
softening = 1e-3

In [4]:
@njit(parallel=True)
def p_acc_compute(mass,position_1,position_2,softening,i,N):

    """
    Fixing particle i, this computes the acceleration of particle i due to all the other particles
    """
    acc_x = 0
    acc_y = 0
    acc_z = 0

    # deleting prange(i+1,N) for parallization wouldn't compute correctly the ji part 
    for j in prange(N):
        # Compute relative acceleration given
        # position of particle i and j
        mass_2 = mass[j]
        # This may be split into x,y,z for parallelization
        position_2=pos[j,:]
    
        # Cartesian component of the i,j particles distance
        dx = position_1[0] - position_2[0]
        dy = position_1[1] - position_2[1]
        dz = position_1[2] - position_2[2]
        

        # Distance module
        r = np.sqrt(dx**2 + dy**2 + dz**2)

        # Cartesian component of the i,j acceleration
        acceleration = np.zeros(3)
        acceleration[0] = -mass_2 * (5*softening**2 + 2*r**2) * dx / (2*(r**2 + softening**2)**(5/2))
        acceleration[1] = -mass_2 * (5*softening**2 + 2*r**2) * dy / (2*(r**2 + softening**2)**(5/2))
        acceleration[2] = -mass_2 * (5*softening**2 + 2*r**2) * dz / (2*(r**2 + softening**2)**(5/2))

        acc_x += acceleration[0]
        acc_y += acceleration[1]
        acc_z += acceleration[2]

    
    return acc_x,acc_y,acc_z


In [5]:
@njit(parallel=True)
def fast_acceleration_direct(pos,mass,N,softening):
     
    jerk = None
    pot = None
   
    # acc[i,:] ax,ay,az of particle i 
    acc  = np.zeros((N,3))

    # Fix particles i and paralelize over j, since I need the final matrix to be orderered by i
    # ranging over all N, not removing the last particle, since parallization would make it hard to compute the ji part 
    # ie computing for all particles all other particles, without any sort of tricks to make less computations
    for i in range(N):
  
        position_1=pos[i,:]
        # paralellized acc computation
        acc[i,:] = p_acc_compute(mass,position_1,pos,softening,i,N)
                    
    return (acc,jerk,pot)

In [7]:
def acceleration_direct(particles: Particles, softening: float =0., softening_type: str = None, ) \
        -> Tuple[npt.NDArray[np.float64],Optional[npt.NDArray[np.float64]],Optional[npt.NDArray[np.float64]]]:
    
    def acc_2body_Dehnen_softening(position_1,position_2,mass_2, softening):
        
        """
        Implements definition of acceleration for two bodies i,j with Dehnen softening
        
        This is used in the following for loop
        """
        # Cartesian component of the i,j particles distance
        dx = position_1[0] - position_2[0]
        dy = position_1[1] - position_2[1]
        dz = position_1[2] - position_2[2]
        

        # Distance module
        r = np.sqrt(dx**2 + dy**2 + dz**2)

        # Cartesian component of the i,j force
        acceleration = np.zeros(3)
        acceleration[0] = -mass_2 * (5*softening**2 + 2*r**2) * dx / (2*(r**2 + softening**2)**(5/2))
        acceleration[1] = -mass_2 * (5*softening**2 + 2*r**2) * dy / (2*(r**2 + softening**2)**(5/2))
        acceleration[2] = -mass_2 * (5*softening**2 + 2*r**2) * dz / (2*(r**2 + softening**2)**(5/2))

        return acceleration
        
    jerk = None
    pot = None

    pos  = particles.pos
    mass = particles.mass
    N    = len(particles) 

    # acc[i,:] ax,ay,az of particle i 
    acc  = np.zeros([N,3])

    for i in range(N-1):
        for j in range(i+1,N):
            # Compute relative acceleration given
            # position of particle i and j
            mass_1 = mass[i]
            mass_2 = mass[j]
            acc_ij = acc_2body_Dehnen_softening(position_1=pos[i,:],position_2=pos[j,:],mass_2=mass_2, softening=softening)
                
            # Update array with accelerations
            acc[i,:] += acc_ij
            acc[j,:] -= mass_1 * acc_ij / mass_2 # because acc_2nbody already multiply by m[j]
        
    return (acc,jerk,pot)



In [8]:
# check if the results are the same
std_acc = acceleration_direct(particles, softening,  "Dehnen")


In [9]:
fast_acc = fast_acceleration_direct(pos,mass,N_particles,softening)

In [10]:
print("Do they give the same result?",np.allclose(std_acc[0], fast_acc[0]))

Do they give the same result? True


In [11]:
%%timeit
acceleration_direct(particles, softening,  "Dehnen")

86.6 µs ± 15.3 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [12]:
%%timeit
fast_acceleration_direct(pos,mass,N_particles,softening)

The slowest run took 4.02 times longer than the fastest. This could mean that an intermediate result is being cached.
5.97 ms ± 3.36 ms per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Make it faster

In [13]:
@njit(parallel=True,fastmath=True)
def acc_2body_Dehnen_softening(position_1,position_2,mass_2, softening):
        
        """
        Implements definition of acceleration for two bodies i,j with Dehnen softening
        
        This is used in the following for loop
        """
        # Cartesian component of the i,j particles distance
        dx = position_1[0] - position_2[0]
        dy = position_1[1] - position_2[1]
        dz = position_1[2] - position_2[2]
        

        # Distance module
        r = np.sqrt(dx**2 + dy**2 + dz**2)

        # Cartesian component of the i,j force
        acceleration = np.zeros(3)
        acceleration[0] = -mass_2 * (5*softening**2 + 2*r**2) * dx / (2*(r**2 + softening**2)**(5/2))
        acceleration[1] = -mass_2 * (5*softening**2 + 2*r**2) * dy / (2*(r**2 + softening**2)**(5/2))
        acceleration[2] = -mass_2 * (5*softening**2 + 2*r**2) * dz / (2*(r**2 + softening**2)**(5/2))

        return acceleration

In [21]:
@njit(parallel=True,fastmath=True)
def compute_acc_parallel(N, pos, mass, softening):
    acc = np.zeros((N, 3))  # Initialize acceleration array
    for i in prange(N-1):
        for j in prange(i+1, N):
            mass_i = mass[i]
            mass_j = mass[j]
            #acc_ij = acc_2body_Dehnen_softening(pos[i], pos[j], mass_j, softening)
            # Cartesian component of the i,j particles distance
            position_1 = pos[i]
            position_2 = pos[j] 
            mass_2 = mass[j]
            dx = position_1[0] - position_2[0]
            dy = position_1[1] - position_2[1]
            dz = position_1[2] - position_2[2]
            

            # Distance module
            r = np.sqrt(dx**2 + dy**2 + dz**2)

            # Cartesian component of the i,j force
            acceleration = np.zeros(3)
            acceleration[0] = -mass_2 * (5*softening**2 + 2*r**2) * dx / (2*(r**2 + softening**2)**(5/2))
            acceleration[1] = -mass_2 * (5*softening**2 + 2*r**2) * dy / (2*(r**2 + softening**2)**(5/2))
            acceleration[2] = -mass_2 * (5*softening**2 + 2*r**2) * dz / (2*(r**2 + softening**2)**(5/2))
            acc_ij = acceleration
            acc[i] += acc_ij
            # Data dependency could slow down the parallelization
            acc[j] -= mass_i * acc_ij / mass_j
    return acc


acc = compute_acc_parallel(N_particles, pos, mass, softening)


In [22]:
compute_acc_parallel.parallel_diagnostics()

 
 Parallel Accelerator Optimizing:  Function compute_acc_parallel, 
/tmp/ipykernel_807/1387894056.py (1)  


Parallel loop listing for  Function compute_acc_parallel, /tmp/ipykernel_807/1387894056.py (1) 
-------------------------------------------------------------------------------------------------------------|loop #ID
@njit(parallel=True,fastmath=True)                                                                           | 
def compute_acc_parallel(N, pos, mass, softening):                                                           | 
    acc = np.zeros((N, 3))  # Initialize acceleration array--------------------------------------------------| #8
    for i in prange(N-1):------------------------------------------------------------------------------------| #12
        for j in prange(i+1, N):-----------------------------------------------------------------------------| #11
            mass_i = mass[i]                                                                               

In [16]:
fast_acceleration_direct.parallel_diagnostics()

 
 Parallel Accelerator Optimizing:  Function fast_acceleration_direct, 
/tmp/ipykernel_807/3704591935.py (1)  


Parallel loop listing for  Function fast_acceleration_direct, /tmp/ipykernel_807/3704591935.py (1) 
----------------------------------------------------------------------------------------------------------------------------|loop #ID
@njit(parallel=True)                                                                                                        | 
def fast_acceleration_direct(pos,mass,N,softening):                                                                         | 
                                                                                                                            | 
    jerk = None                                                                                                             | 
    pot = None                                                                                                              | 
                 

In [17]:
print("Do they give the same result?",np.allclose(std_acc[0], acc))

Do they give the same result? True


In [23]:
%%timeit
compute_acc_parallel(N_particles, pos, mass, softening)

2.15 ms ± 848 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [19]:
%%timeit
fast_acceleration_direct(pos,mass,N_particles,softening)

6.69 ms ± 348 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
%%timeit
acceleration_direct(particles, softening,  "Dehnen")

50.5 µs ± 10.9 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
