## This is a little notebook that can approximately say how much memory (and therefore how many GPUs) would be needed for a given scale of run

In [2]:
# Ensure that you are only using 80% of GPU memory
import numpy as np

# Specify the bounds of the simulation in osiris units (c / wpe)
x_bounds = [-3000,3000]
y_bounds = [1890, 8000]

dx = 0.3

n_cells = (x_bounds[1]-x_bounds[0])*(y_bounds[1]-y_bounds[0])/(dx**2)
print("Number of cells: ", np.format_float_scientific(n_cells))

particles_per_cell = 25
n_species = 3

n_particles = n_cells * particles_per_cell * n_species # we need to allocate for twice as many particles
n_bytes_particles = n_particles* 2 * 70 # maria says ~70 bytes per particle. I don't know if this is single or double precision

max_bytes_per_GPU = 16e9/.8 # 80% of 16GB
print("Number of particles: ", np.format_float_scientific(n_particles,3))

print("Recommended number of GPUs: ", np.ceil(n_bytes_particles/max_bytes_per_GPU))
print("Recommended number of nodes:", np.ceil(n_bytes_particles/max_bytes_per_GPU/6))

print("nx = ", round((x_bounds[1]-x_bounds[0])/dx,0))
print("ny = ", round((y_bounds[1]-y_bounds[0])/dx,0))

Number of cells:  4.073333333333334e+08
Number of particles:  3.055e+10
Recommended number of GPUs:  214.0
Recommended number of nodes: 36.0
nx =  20000.0
ny =  20367.0


## Now, in order to use the tiles algorithm or the GPU algorithm, you need to specify the number of tiles you want in each direction.

In [6]:
n_tiles_min = n_cells / 1024

print("Number of tiles in each direction ", np.ceil(np.sqrt(n_tiles_min)))

# Just keep typing in powers of two until you get n_tiles > n_tiles_min

i = 0
j = 0
while True:
    n_tiles_x = 2**i
    n_tiles_y = 2**j
    n_tiles = n_tiles_x * n_tiles_y
    if n_tiles > n_tiles_min:
        break
    i += 1
    j += 1
print("n_tiles = ", n_tiles_x * n_tiles_y)
print(f"n_tiles_x = {n_tiles_x}, n_tiles_y = {n_tiles_y}")

particles_per_tile = n_particles/n_tiles/n_species
print("Particles per tile: ", np.ceil(particles_per_tile))
print("You should set num_par_max to be 2x this: ", np.ceil(2*particles_per_tile))

Number of tiles in each direction  631.0
n_tiles =  1048576
n_tiles_x = 1024, n_tiles_y = 1024
Particles per tile:  9712.0
You should set num_par_max to be 2x this:  19424.0
