In [None]:
from fireworks.ic import ic_two_body as ic_two_body
from fireworks.ic import ic_random_uniform as ic_random_uniform

from fireworks.nbodylib import dynamics as dyn
from fireworks.nbodylib import integrators as intg


import matplotlib.pyplot as plt
import numpy as np
from typing import Optional, Tuple
import numpy.typing as npt
from fireworks.particles import Particles


import numba
from numba import prange, njit

In [None]:
mass1 = 1.0
mass2 = 1.0
rp = 1.0
e = 0.0

particles = ic_two_body(mass1, mass2, rp, e)

pos = particles.pos
vel = particles.vel
N = len(particles)    
mass = particles.mass
softening = 1e-3
tstep = 1e-3

#particles = ic_random_uniform(100,[1,1],[1,10],[0,1])

In [None]:
@njit(parallel=True,fastmath=True)
def acceleration_parallel(position_1,N,mass,i):
    # acceleration of particle i 
    acceleration = np.zeros(3)
    for j in prange(N):
        if i != j: 
            print("indii",i,j)
            # Cartesian component of the i,j particles distance
            position_2 = pos[j] 
            
            mass_2 = mass[j]
            dx = position_1[0] - position_2[0]
            dy = position_1[1] - position_2[1]
            dz = position_1[2] - position_2[2]
            
            # Distance module
            r = np.sqrt(dx**2 + dy**2 + dz**2)
            
            # Cartesian component of the i,j force
            # This shouldn't give problems during parallelization because I'm updating a specific element
            # and the order in j in which this is done is not relevant 
            
            acceleration[0] += -mass_2 * (5*softening**2 + 2*r**2) * dx / (2*(r**2 + softening**2)**(5/2))
            acceleration[1] += -mass_2 * (5*softening**2 + 2*r**2) * dy / (2*(r**2 + softening**2)**(5/2))
            acceleration[2] += -mass_2 * (5*softening**2 + 2*r**2) * dz / (2*(r**2 + softening**2)**(5/2))

    return acceleration


In [None]:
@njit(parallel=True)
def integrator_euler(pos,
                     vel,
                     acc,
                     tstep,
                     jerk=None,
                     potential=None
                     ):
    
    # removing check for external acceleration
    #
    #
    # Euler integration
    vel = vel + acc * tstep  # Update vel
    pos = pos + vel * tstep  # Update pos
    #particles.set_acc(acc)  # Set acceleration

    return (vel,pos)


In [38]:
a = np.array([1,2])
b = a[1:]
b += 1
print(a)
print(b)


[1 3]
[3]


In [None]:
@njit(parallel=True)
def full_parallel_evo(N,pos,vel,mass,tstep):
    for i in prange(N):
        position_1 = pos[i] 
        acc_i   = acceleration_parallel(position_1,N,mass,i)
        print(f"acc_{i}",acc_i,"\n")
        vel2,pos2 = integrator_euler(position_1,vel[i],acc_i,tstep)
        print(f"new position of particle {i}:",pos2)
        print("\n")
        

In [None]:
full_parallel_evo(N,pos,vel,mass,tstep)

In [None]:
full_parallel_evo.parallel_diagnostics()

In [None]:
particles2, tstep2, acc2, jerk, potential = intg.integrator_euler(particles,tstep,acceleration_estimator=dyn.acceleration_direct,softening="Dehnen")
particles2.pos

In [None]:
acc2

In [63]:
@njit(parallel=True,fastmath=True)
def full_parallel_evo(N,pos,vel,mass,tstep):
    for i in prange(N):
        position_1 = pos[i]
        acceleration = np.zeros(3)
        for j in prange(N):
            if i != j: 
      
                # Cartesian component of the i,j particles distance
                position_2 = pos[j] 
                mass_2 = mass[j]
                dx = position_1[0] - position_2[0]
                dy = position_1[1] - position_2[1]
                dz = position_1[2] - position_2[2]
                
                # Distance module
                r = np.sqrt(dx**2 + dy**2 + dz**2)
                
                # Cartesian component of the i,j force
                # This shouldn't give problems during parallelization because I'm updating a specific element
                # and the order in j in which this is done is not relevant 
                
                acceleration[0] += -mass_2 * (5*softening**2 + 2*r**2) * dx / (2*(r**2 + softening**2)**(5/2))
                acceleration[1] += -mass_2 * (5*softening**2 + 2*r**2) * dy / (2*(r**2 + softening**2)**(5/2))
                acceleration[2] += -mass_2 * (5*softening**2 + 2*r**2) * dz / (2*(r**2 + softening**2)**(5/2))


        #vel2,pos2 = integrator_euler(pos[i],vel[i],acceleration,tstep)

        vel2 = vel + acceleration * tstep  # Update vel
        pos2 = pos + vel * tstep

        #print(f"new position of particle {i}:",pos2)
        #print("\n")
        


In [64]:
%%timeit
full_parallel_evo(N,pos,vel,mass,tstep)

The slowest run took 19.04 times longer than the fastest. This could mean that an intermediate result is being cached.
50.6 µs ± 60.6 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [58]:
full_parallel_evo.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function full_parallel_evo, 
/tmp/ipykernel_78106/3995311148.py (1)  


Parallel loop listing for  Function full_parallel_evo, /tmp/ipykernel_78106/3995311148.py (1) 
------------------------------------------------------------------------------------------------------------------|loop #ID
@njit(parallel=True,fastmath=True)                                                                                | 
def full_parallel_evo(N,pos,vel,mass,tstep):                                                                      | 
    for i in prange(N):-------------------------------------------------------------------------------------------| #42
        position_1 = pos[i]                                                                                       | 
       # acc_i   = acceleration_parallel(position_1,N,mass,i)                                                     | 
                                                                                    

In [21]:
def boh():
    x = np.zeros(100)
    y = np.ones(100)
    for i in range(len(y)):
        x += y[i] 


In [22]:
%%timeit
boh()

406 µs ± 175 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [23]:
@njit
def boh():
    x = np.zeros(100)
    y = np.ones(100)
    for i in range(len(y)):
        x += y[i] 


In [24]:
%%timeit
boh()

The slowest run took 5.26 times longer than the fastest. This could mean that an intermediate result is being cached.
2.64 µs ± 2.37 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [25]:
@njit(parallel=True)
def boh():
    x = np.zeros(100)
    y = np.ones(100)
    for i in range(len(y)):
        x += y[i] 


In [26]:
%%timeit
boh()

The slowest run took 35.18 times longer than the fastest. This could mean that an intermediate result is being cached.
73.8 µs ± 108 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [27]:
boh.parallel_diagnostics()

 
 Parallel Accelerator Optimizing:  Function boh, 
/tmp/ipykernel_78106/1245845950.py (1)  


Parallel loop listing for  Function boh, /tmp/ipykernel_78106/1245845950.py (1) 
-------------------------------|loop #ID
@njit(parallel=True)           | 
def boh():                     | 
    x = np.zeros(100)----------| #14
    y = np.ones(100)-----------| #15
    for i in range(len(y)):    | 
        x += y[i]              | 
------------------------------ After Optimisation ------------------------------
Parallel region 0:
+--14 (parallel, fused with loop(s): 15)


 
Parallel region 0 (loop #14) had 1 loop(s) fused.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
 
