In [1]:
# See how much memory is used by OS initially.
# Then substract this value from later mem used, to obtain mem used by scripts.
import psutil
GB_UNIT = 1000*1024**2
MB_UNIT = GB_UNIT/1e3
OS_MEM = psutil.virtual_memory().used
print('OS memory in GB:', OS_MEM/GB_UNIT)


from shared.preface import *
import shared.functions as fct


# Initialize parameters and files.
PRE = PRE(
    sim='L025N752', 
    z0_snap=36, z4_snap=13, DM_lim=10000,
    sim_dir=SIM_ROOT, sim_ver=SIM_TYPE,
    phis=10, thetas=10, vels=100,
    pre_CPUs=128, sim_CPUs=128, mem_lim_GB=224
)


TEMP_DIR = f'X_tests'

# Halo parameters.
mass_gauge = 12.0
mass_range = 0.6
size = 3

hname = f'1e+{mass_gauge}_pm{mass_range}Msun'
fct.halo_batch_indices(
    PRE.Z0_STR, mass_gauge, mass_range, 'halos', size, 
    hname, PRE.SIM_DIR, TEMP_DIR
)
halo_batch_IDs = np.load(f'{TEMP_DIR}/halo_batch_{hname}_indices.npy')
halo_batch_params = np.load(f'{TEMP_DIR}/halo_batch_{hname}_params.npy')
halo_num = len(halo_batch_params)

print('********Number density band********')
print('Halo batch params (Rvir,Mvir,cNFW):')
print(halo_batch_params)
print('***********************************')

OS memory in GB: 14.97714453125
********************* Initialization *********************
# Initial conditions for neutrinos:
PHIs = 10, THETAs=10, Vs=100
Total neutrinos: 10000
# Simulation parameters:
Simulation box: L025N752
Snapshot from 0036 (z=0) to 0013 (z=4)
Pre/Sim CPUs 128/128
DM limit for cells: 10000
# File management:
Box files directory: 
 /projects/0/einf180/Tango_sims/L025N752/DMONLY/SigmaConstant00
Output directory: 
 /gpfs/home4/zimmer/neutrino_clustering_V2/L025N752/DMONLY/SigmaConstant00
**********************************************************
********Number density band********
Halo batch params (Rvir,Mvir,cNFW):
[[253.5523526   12.24058042   6.9286026 ]
 [158.62979351  11.62953177   8.03988676]
 [137.07912107  11.43929142   9.41292697]]
***********************************


In [2]:
# =============================================== #
# Run precalculations for selected halo in batch. #
# =============================================== #

halo_j = 2
halo_ID = halo_batch_IDs[halo_j]

# Generate progenitor index array for current halo.
splits = re.split('/', SIM_TYPE)
MTname = f'{PRE.SIM}_{splits[0]}_{splits[1]}'
proj_IDs = fct.read_MergerTree(PRE.OUT_DIR, MTname, halo_ID)

# Create empty arrays to save specifics of each loop.
save_GRID_L = np.zeros(len(PRE.NUMS_SNAPS))
save_num_DM = np.zeros(len(PRE.NUMS_SNAPS))

j = 0
snap = PRE.NUMS_SNAPS[::-1][0]
proj_ID = proj_IDs[0]
proj_ID = int(proj_ID)

# Output halo progress.
print(f'halo {halo_j+1}/{halo_num} ; snapshot {snap}')


# --------------------------- #
# Read and load DM positions. #
# --------------------------- #

IDname = f'origID{halo_ID}_snap_{snap}'
fct.read_DM_halo_index(
    snap, proj_ID, IDname, PRE.SIM_DIR, TEMP_DIR
)
DM_raw = np.load(f'{TEMP_DIR}/DM_pos_{IDname}.npy')
DM_particles = len(DM_raw)


# ---------------------- #
# Cell division process. #
# ---------------------- #

# Initialize grid.
snap_GRID_L = (int(np.abs(DM_raw).max()) + 1)*kpc
raw_grid = fct.grid_3D(snap_GRID_L, snap_GRID_L)
init_grid = np.expand_dims(raw_grid, axis=1)

# Prepare arrays for cell division.
DM_raw *= kpc
DM_pos = np.expand_dims(DM_raw, axis=0)
DM_pos_for_cell_division = np.repeat(DM_pos, len(init_grid), axis=0)
del DM_raw

# Cell division.
cell_division_count = fct.cell_division(
    init_grid, DM_pos_for_cell_division, snap_GRID_L, PRE.DM_LIM, None, TEMP_DIR, IDname
)
del DM_pos_for_cell_division

halo 3/3 ; snapshot 0036


### Short-range gravity.

In [3]:
def chunksize_short_range(cells, DM_tot, max_DM_lim, core_mem_MB):

    # note: mem_MB specific to peak memory usage in cell_gravity_short_range.
    # -> Peak memory after calculation of ind_2D,ind_3D,etc. sorting arrays.

    elem = 8                               # 8 bytes for standard np.float64
    mem_type0 = cells*3 * elem             # for list to ndarray of cell_coords
    mem_type1 = cells*DM_tot * elem        # for ind_2D
    mem_type2 = cells*DM_tot*3 * elem      # for DM_pos_sync, ind_3D, DM_sort
    mem_type3 = cells*max_DM_lim*3 * elem  # for DM_in

    mem_MB = (mem_type0+mem_type1+(3*mem_type2)+mem_type3)/1.e6

    batches = 1
    while mem_MB >= 0.95*core_mem_MB:
        mem_MB *= batches
        batches += 1
        mem_MB /= batches

    chunksize = math.ceil(cells/batches)

    return chunksize


def batch_generators_short_range(cell_coords, cell_gen, chunksize):

    cells = len(cell_coords)

    batches = math.ceil(cells/chunksize)
    batch_arr = np.arange(batches)

    cell_chunks = chunks(chunksize, cell_coords)
    cgen_chunks = chunks(chunksize, cell_gen)
    
    return batch_arr, cell_chunks, cgen_chunks


def cell_gravity_short_range(
    cell_coords_in, cell_gen, init_GRID_S,
    DM_pos, DM_lim, DM_sim_mass, smooth_l,
    out_dir, b_id
):

    cell_coords = np.expand_dims(np.array(cell_coords_in), axis=1)
    cell_gen = np.array(cell_gen)

    # Center all DM positions w.r.t. cell center.
    # DM_pos already in shape = (1, DM_particles, 3)
    DM_pos_sync = np.repeat(DM_pos, len(cell_coords), axis=0)
    DM_pos_sync -= cell_coords

    # Cell lengths to limit DM particles. Limit for the largest cell is 
    # GRID_S/2, not just GRID_S, therefore the cell_gen+1 !
    cell_len = np.expand_dims(init_GRID_S/(2**(np.array(cell_gen)+1)), axis=1)

    # Select DM particles inside each cell based on cube length generation.
    DM_in_cell_IDs = np.asarray(
        (np.abs(DM_pos_sync[:,:,0]) < cell_len) & 
        (np.abs(DM_pos_sync[:,:,1]) < cell_len) & 
        (np.abs(DM_pos_sync[:,:,2]) < cell_len)
    )
    del cell_gen, cell_len

    # Set DM outside cell to nan values.
    DM_pos_sync[~DM_in_cell_IDs] = np.nan
    del DM_in_cell_IDs

    # Sort all nan values to the bottom of axis 1, i.e. the DM-in-cell-X axis 
    # and truncate array based on DM_lim parameter. This simple way works since 
    # each cell cannot have more than DM_lim.
    ind_2D = DM_pos_sync[:,:,0].argsort(axis=1)
    ind_3D = np.repeat(np.expand_dims(ind_2D, axis=2), 3, axis=2)
    DM_sort = np.take_along_axis(DM_pos_sync, ind_3D, axis=1)
    DM_in = DM_sort[:,:DM_lim*SHELL_MULTIPLIERS[-1],:]

    # note: Memory peaks here, due to these arrays:
    # print(DM_pos_sync.shape, ind_2D.shape, ind_3D.shape, DM_sort.shape, DM_in.shape)
    # mem_inc = gso(cell_coords)+gso(DM_pos_sync)+gso(ind_2D)+gso(ind_3D)+gso(DM_sort)+gso(DM_in)
    # print('MEM_PEAK:', mem_inc/1e6)
    del DM_pos_sync, ind_2D, ind_3D, DM_sort

    # Calculate distances of DM and adjust array dimensionally.
    DM_dis = np.expand_dims(np.sqrt(np.sum(DM_in**2, axis=2)), axis=2)

    # Offset DM positions by smoothening length of Camila's simulations.
    eps = smooth_l / 2.
    # eps = smooth_l

    # nan values to 0 for numerator, and 1 for denominator to avoid infinities.
    quot = np.nan_to_num(cell_coords - DM_in, copy=False, nan=0.0) / \
        np.nan_to_num(
            np.power((DM_dis**2 + eps**2), 3./2.), copy=False, nan=1.0
        )
    
    # note: Minus sign, s.t. velocity changes correctly (see GoodNotes).
    derivative = -G*DM_sim_mass*np.sum(quot, axis=1)    
    np.save(f'{out_dir}/batch{b_id}_short_range.npy', derivative)

In [4]:
# Load files from cell division.
fin_grid = np.load(f'{TEMP_DIR}/fin_grid_{IDname}.npy')
DM_count = np.load(f'{TEMP_DIR}/DM_count_{IDname}.npy')
cell_com = np.load(f'{TEMP_DIR}/cell_com_{IDname}.npy')
cell_gen = np.load(f'{TEMP_DIR}/cell_gen_{IDname}.npy')


# --------------------------------------------- #
# Calculate gravity grid (in batches of cells). #
# --------------------------------------------- #
cell_coords = np.squeeze(fin_grid, axis=1)
cells = len(cell_coords)

In [5]:
# Calculate available memory per core.
mem_so_far = (psutil.virtual_memory().used - OS_MEM)/MB_UNIT
mem_left = PRE.MEM_LIM_GB*1e3 - mem_so_far
core_mem_MB = mem_left / PRE.PRE_CPUs

# Determine short-range chuncksize based on available memory and cells.
chunksize_sr = chunksize_short_range(
    cells, DM_particles, PRE.DM_LIM*SHELL_MULTIPLIERS[-1], core_mem_MB
)

# Split workload into batches (if necessary).
batch_arr, cell_chunks, cgen_chunks = batch_generators_short_range(
    cell_coords, cell_gen, chunksize_sr
)



# cell_gravity_short_range(
#     next(cell_chunks), next(cgen_chunks), snap_GRID_L, DM_pos, 
#     PRE.DM_LIM, PRE.DM_SIM_MASS, 
#     PRE.SMOOTH_L, TEMP_DIR, batch_arr[0]
# )


with ProcessPoolExecutor(PRE.PRE_CPUs) as ex:
    ex.map(
        cell_gravity_short_range, 
        cell_chunks, cgen_chunks, repeat(snap_GRID_L), repeat(DM_pos), 
        repeat(PRE.DM_LIM), repeat(PRE.DM_SIM_MASS), 
        repeat(PRE.SMOOTH_L), repeat(TEMP_DIR), batch_arr
    )

In [15]:
# Combine short-range batch files.
dPsi_short_range_batches = [
    np.load(f'{TEMP_DIR}/batch{b}_short_range.npy') for b in batch_arr
]
dPsi_short_range = np.array(
    list(chain.from_iterable(dPsi_short_range_batches))
)
np.save(
    f'{TEMP_DIR}/dPsi_short_range_{IDname}.npy', 
    dPsi_short_range
)

gravity_sr = np.load(f'{TEMP_DIR}/dPsi_short_range_{IDname}.npy')
print(gravity_sr.shape)

(183, 3)


In [25]:
mags_sr = np.sqrt(np.sum(gravity_sr**2, axis=1))
print(mags_sr.shape, np.max(mags_sr))

(183,) 4.635577750162663e-34


### Long-range gravity.

In [51]:
def chunksize_long_range(cells, core_mem_MB):
    
    # note: mem_MB specific to peak memory usage in cell_gravity_long_range.
    # -> Peak memory after calculation of derivative.

    elem = 8                          # 8 bytes for standard np.float64
    mem_type1 = 3*elem                # for derivative
    mem_type2 = cells*3*elem          # for quot
    mem_type3 = cells*elem            # for DM_count_sync

    mem_MB = (mem_type1+mem_type2+mem_type3)/1.e6

    batches = 1
    while mem_MB >= 0.95*core_mem_MB:
        mem_MB *= batches
        batches += 1
        mem_MB /= batches

    chunksize = math.ceil(cells/batches)

    return chunksize
    

def batch_generators_long_range(
    cell_coords, com_coords, DM_counts,
    chunksize 
):
    cells = len(cell_coords)
    cell_nums = np.arange(cells)

    batches = math.ceil(cells/chunksize)

    # Arrays used for naming files.
    id_arr = np.array([idx+1 for idx in cell_nums for _ in range(batches)])
    batch_arr = np.array([b+1 for _ in cell_nums for b in range(batches)])

    # Coord of cell, for which long-range gravity gets calculated.
    coord_arr = np.array([cc for cc in cell_coords for _ in range(batches)])

    # Chunks for DM_count array, as a generator for all cells.
    count_gens = (c for _ in cell_nums for c in chunks(chunksize, DM_counts))
    count_chain = chain(gen for gen in count_gens)

    # Chunks for cell_com array, as a generator for all cells.
    com_gens = (c for _ in cell_nums for c in chunks(chunksize, com_coords))
    com_chain = chain(gen for gen in com_gens)

    return id_arr, batch_arr, coord_arr, count_chain, com_chain
    

def cell_gravity_long_range(
    c_id, b_id, cellX_coords, 
    DM_count, cell_com, 
    DM_sim_mass, smooth_l, out_dir
):

    # Distances between cell centers and cell c.o.m. coords.
    com_dis = np.expand_dims(
        np.sqrt(np.sum((cellX_coords-cell_com)**2, axis=1)), axis=1
    )

    # Offset DM positions by smoothening length of Camila's simulations.
    eps = smooth_l / 2.
    # eps = smooth_l

    # Long-range gravity component for each cell (including itself for now).
    quot = (cellX_coords-cell_com)/np.power((com_dis**2 + eps**2), 3./2.)
    DM_count_sync = np.expand_dims(DM_count, axis=1)
    del com_dis

    # Set self-gravity to zero.
    # print(DM_count_sync.shape)
    # print(c_id)
    print('Before', DM_count_sync[c_id-1, 0])
    DM_count_sync[c_id-1, 0] = 0.
    print('After', DM_count_sync[c_id-1, 0])
    # print(quot.shape)

    strength = -G*DM_sim_mass*DM_count_sync*quot
    print(strength[c_id-1])
    mags = np.sqrt(np.sum(strength**2, axis=1))
    print(mags.shape, np.max(mags))

    # note: Minus sign, s.t. velocity changes correctly (see GoodNotes).
    derivative = -G*DM_sim_mass*np.sum(DM_count_sync*quot, axis=0)
    print(derivative)

    # note: Memory peaks here, due to these arrays:
    # print(quot.shape, DM_count_sync.shape, derivative.shape)
    # mem_inc = gso(quot)+gso(DM_count_sync)+gso(derivative)
    # print(mem_inc/1e6)
    del quot, DM_count_sync

    np.save(f'{out_dir}/cell{c_id}_batch{b_id}_long_range.npy', derivative)


def load_dPsi_long_range(c_id, batches, out_dir):
    
    # Load all batches for current cell.
    dPsi_raw = np.array(
        [np.load(f'{out_dir}/cell{c_id}_batch{b}_long_range.npy') for b in batches]
    )

    dPsi_for_cell = np.sum(dPsi_raw, axis=0)
    np.save(f'{out_dir}/cell{c_id}_long_range.npy', dPsi_for_cell)  


# Load files from cell division.
fin_grid = np.load(f'{TEMP_DIR}/fin_grid_{IDname}.npy')
DM_count = np.load(f'{TEMP_DIR}/DM_count_{IDname}.npy')
cell_com = np.load(f'{TEMP_DIR}/cell_com_{IDname}.npy')


# --------------------------------------------- #
# Calculate gravity grid (in batches of cells). #
# --------------------------------------------- #
cell_coords = np.squeeze(fin_grid, axis=1)
cells = len(cell_coords)

print('Division rounds:', cell_division_count)
print('SHAPES:', cell_coords.shape, DM_count.shape, cell_com.shape)
print('************')

# Calculate available memory per core.
mem_so_far = (psutil.virtual_memory().used - OS_MEM)/MB_UNIT
mem_left = PRE.MEM_LIM_GB*1e3 - mem_so_far
core_mem_MB = mem_left / PRE.PRE_CPUs

# Determine long-range chuncksize based on available memory and cells.
chunksize_lr = chunksize_long_range(cells, core_mem_MB)

# Split workload into batches (if necessary).
cell_ids, batches, coords, count_chain, com_chain = batch_generators_long_range(
    cell_coords, cell_com, DM_count, chunksize_lr
)


# cell_gravity_long_range(
#     cell_ids[0], batches[0], 
#     coords[0], next(count_chain), next(com_chain), 
#     PRE.DM_SIM_MASS, PRE.SMOOTH_L, TEMP_DIR
# )

last_count = list(count_chain)[-1]
last_com = list(com_chain)[-1]

print(cell_ids[-1])

cell_gravity_long_range(
    cell_ids[-1], batches[-1], 
    coords[-1], last_count, last_com, 
    PRE.DM_SIM_MASS, PRE.SMOOTH_L, TEMP_DIR
)


# with ProcessPoolExecutor(PRE.PRE_CPUs) as ex:
#     ex.map(
#         cell_gravity_long_range, cell_ids, batches, 
#         coords, count_chain, com_chain,
#         repeat(PRE.DM_SIM_MASS), repeat(PRE.SMOOTH_L), repeat(TEMP_DIR)
#     )

Division rounds: 4
SHAPES: (183, 3) (183,) (183, 3)
************
183
Before 697
After 0
(183, 3)
(183,) 8.250400909984426e-36
[-4.94299072e-35 -4.74802435e-35  1.53854474e-35]


In [30]:
# Combine long-range batch files.
load_batch_arr = np.unique(batches)
with ProcessPoolExecutor(PRE.PRE_CPUs) as ex:
    ex.map(
        load_dPsi_long_range, cell_ids, 
        repeat(load_batch_arr), repeat(TEMP_DIR)
    )

dPsi_long_range = np.array([
    np.load(f'{TEMP_DIR}/cell{c}_long_range.npy') for c in cell_ids
])
np.save(
    f'{TEMP_DIR}/dPsi_long_range_{IDname}.npy', 
    dPsi_long_range
)

gravity_lr = np.load(f'{TEMP_DIR}/dPsi_long_range_{IDname}.npy')
print(gravity_lr.shape, np.max(gravity_lr, axis=0))

mags_lr = np.sqrt(np.sum(gravity_lr**2, axis=1))
print(mags_lr.shape, np.max(mags_lr))

(183, 3) [3.49735510e-34 2.53571754e-34 5.40151363e-34]
(183,) 7.177394606954965e-34


In [None]:
# Combine long-range batch files.
load_batch_arr = np.unique(batches)
load_dPsi_long_range( 
    cell_ids[0], load_batch_arr, TEMP_DIR
)

dPsi_long_range = np.array([np.load(f'{TEMP_DIR}/cell1_long_range.npy')])
np.save(
    f'{TEMP_DIR}/dPsi_long_range_{IDname}.npy', 
    dPsi_long_range
)

gravity_lr = np.load(f'{TEMP_DIR}/dPsi_long_range_{IDname}.npy')
print(gravity_lr.shape)