This file is used for extract particle info of target halos from snapshots.

## Preparations

In [None]:
import os
import h5py
import numpy as np
import logging
import shutil
import gc
import matplotlib.pyplot as plt

# Physics-related Packages
from astropy.cosmology import Planck15

### Path

In [None]:
Target_cat_name = "/users_path/merger_trace/data/tng300/tng300_targetcat/TargetHalo_Cat_092.hdf5"
SnapPath = "/users_path/merger_trace/data/tng300/tng300_snapshots/"
SavePath = "/users_path/merger_trace/data/tng300/tng300_halos/"

### Creat Null HDF5 Files for Each Target Halo

#### Delete files before creation

In [None]:
directory = SavePath

if os.path.exists(directory):
    # loop over all files/directories in the target directory
    for file_or_dir in os.listdir(directory):
        file_or_dir_path = os.path.join(directory, file_or_dir)
        try:
            if os.path.isfile(file_or_dir_path) or os.path.islink(file_or_dir_path):
                os.remove(file_or_dir_path)  
                print(f"{file_or_dir_path} has already been deleted")
            elif os.path.isdir(file_or_dir_path):
                shutil.rmtree(file_or_dir_path)  
                print(f"{file_or_dir_path} has already been deleted")
        except Exception as e:
            print(f"cannot remove {file_or_dir_path}: {e}")
else:
    print(f"The directory {directory} does not exist")


#### Function for Finding Halos with Subhalos

In [None]:
def Find_HalosWithSub(Group_FirstSub):
    """
    return indices of halos in this array
    """
    Indices_HaloWithSUb = np.where( Group_FirstSub != -1)[0]
    Indice_CenterHalo = Indices_HaloWithSUb[0]
    return Indices_HaloWithSUb

In [None]:
def Find_CenterSubhalo(HaloIDs, SubhaloGrNr, SubhaloIDs):
    CenterSubhalo_IDs = np.zeros(HaloIDs.shape)
    CenterSubhalo_Indices = np.zeros(HaloIDs.shape)
    for i in range(len(HaloIDs)):
        HaloID = HaloIDs[i]
        CenterSubhalo_Indices[i] = np.array(np.where(SubhaloGrNr == HaloID))[0][0]
        CenterSubhalo_IDs[i] = SubhaloIDs[int(CenterSubhalo_Indices[i])]
    
    return CenterSubhalo_IDs, CenterSubhalo_Indices

In [None]:
# create the null files for sky zones
# Dict names used here are similar to those in Illustris
def create_hdf5_nulldict(Halo_ID):
    """ Given FOF halo IDs, i.e. ID = 0, create a null
        hdf5 file to store particles in different FOF halos """
    file_name = f'{SavePath}snap_halo_{Halo_ID}.h5'
    
    with h5py.File(file_name, 'w') as hdf:
        # create dict for gas particles
        PartType0 = hdf.create_group('PartType0')

        """PartType0.create_dataset(
            'CenterOfMass',
            shape = (0,3),
            maxshape = (None,3),
            dtype = 'float64'
            )""" # unit: ckpc/h, Dims: (N,3)
        
        PartType0.create_dataset(
            'Coordinates',
            shape = (0,3),
            maxshape = (None,3),
            dtype = 'float64'
            ) # unit: ckpc/h, Dims: (N,3)
        
        PartType0.create_dataset(
            'Masses',
            shape = (0,),
            maxshape = (None,),
            dtype = 'float64'
            ) # unit: 10^10 M_{sun}/h, Dims: (N,)

        '''
        PartType0.create_dataset(
            'ParticleIDs',
            shape = (0,),
            maxshape = (None,),
            dtype = 'float64'
            ) # unit: None, Dims: (N,)
        '''
        '''
        PartType0.create_dataset(
            #Potential is stored in case virial radius is needed
            'Potential',
            shape = (0,),
            maxshape = (None,),
            dtype = 'float64'
            ) # unit: (km/s)^2/a, Dims: (N,)
        '''
        '''
        PartType0.create_dataset(
            'Velocities',
            shape = (0,3),
            maxshape = (None,3),
            dtype = 'float64'
            ) # unit: km*square_root(a)/s, Dims: (N,3)
        '''
        # create dict for DM particles
        PartType1 = hdf.create_group('PartType1')
        
        PartType1.create_dataset(
            'Coordinates',
            shape = (0,3),
            maxshape = (None,3),
            dtype = 'float64'
            ) # unit: ckpc/h, Dims: (N,3)

        '''
        PartType1.create_dataset(
            'ParticleIDs',
            shape = (0,),
            maxshape = (None,),
            dtype = 'float64'
            ) # unit: None, Dims: (N,)
        '''
        '''
        PartType1.create_dataset(
            #Potential is stored in case virial radius is needed
            'Potential',
            shape = (0,),
            maxshape = (None,),
            dtype = 'float64'
            ) # unit: (km/s)^2/a, Dims: (N,)
        '''
        
        '''
        PartType1.create_dataset(
            'Velocities',
            shape = (0,3),
            maxshape = (None,3),
            dtype = 'float64'
            ) # unit: km*square_root(a)/s, Dims: (N,3)
        '''
        """
        we don't have to store DM mass since DM particles
        have constant mass
        """

In [None]:
# get Halo IDs and halo information
with h5py.File(Target_cat_name, 'a') as Target_hdf:
    FOF_Halo_IDs =  Target_hdf['Group/FOF_Halo_IDs'][:].astype(int)
    GroupFirstSub = Target_hdf['Group/GroupFirstSub'][:]
    Group_R_Crit200 = Target_hdf['Group/Group_R_Crit200'][:]
    GroupPos = Target_hdf['Group/GroupPos'][:]

Target_Halo_IDs = FOF_Halo_IDs[Find_HalosWithSub(GroupFirstSub)]

In [None]:
# Generate the null files and write logging info

log_file = f'{SavePath}create_snap_halo.log'
os.makedirs(os.path.dirname(log_file), exist_ok=True)

logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

'''
for Halo_ID in Target_Halo_IDs: # we have Halo IDs
    try:
        create_hdf5_nulldict(int(Halo_ID))
        logging.info(f'CREATED {SavePath}snap_halo_{Halo_ID}.h5')
    except Exception as e:
        logging.error(f'FAILED {SavePath}snap_halo_{Halo_ID}.h5')

# Log summary
logging.info('HDF5 creation process completed')
'''

## Sort Partilces into Each Halo Files

In [None]:
# get Halo IDs and halo information

In [None]:
# Loop over all the chunks and sort particles
# The reason that we have to loop over chunks
""" 
Note that the truncation of a snapshot in chunks is arbitrary, 
thus halos may happen to be stored across multiple, subsequent chunks. 
Similarly, the different particle types of a halo can be stored
in different sets of chunks.
"""
# sort particles in one chunk
def sort_particles(snap_number, chunk_index, Snap_Path, SavePath, 
                   FOF_Halo_IDs, GroupPos, Group_R_Crit200, GroupFirstSub):
    snap_name = f'{Snap_Path}snap_0{snap_number}.{chunk_index}.hdf5'
    with h5py.File(snap_name, 'r') as snap_hdf:
        # get info for gas particles
        # PartType0_CenterOfMass = snap_hdf['PartType0/CenterOfMass'][:]
        PartType0_Coordinates = snap_hdf['PartType0/Coordinates'][:]
        PartType0_Masses = snap_hdf['PartType0/Masses'][:]
        #PartType0_ParticleIDs = snap_hdf['PartType0/ParticleIDs'][:]
        # PartType0_Potential = snap_hdf['PartType0/Potential'][:]
        #PartType0_Velocities = snap_hdf['PartType0/Velocities'][:]

        # get info for DM particles
        PartType1_Coordinates = snap_hdf['PartType1/Coordinates'][:]
        #PartType1_ParticleIDs = snap_hdf['PartType1/ParticleIDs'][:]
        # PartType1_Potential = snap_hdf['PartType1/Potential'][:]
        #PartType1_Velocities = snap_hdf['PartType1/Velocities'][:]


    # Open each halo files and store corresponding particles in
    # find target halo IDs to locate files
    Target_Halo_Indices = Find_HalosWithSub(GroupFirstSub)
    Target_Halo_IDs = FOF_Halo_IDs[Target_Halo_Indices]
    Target_Halo_R = Group_R_Crit200[Target_Halo_Indices]
    Target_Halo_Pos = GroupPos[Target_Halo_Indices]

    for i in range(Target_Halo_IDs.shape[0]):
        Halo_ID = int(Target_Halo_IDs[i])
        Halo_R = Target_Halo_R[i]
        Halo_x = Target_Halo_Pos[i][0]
        Halo_y = Target_Halo_Pos[i][1]
        Halo_z = Target_Halo_Pos[i][2]
        
        Halo_File = f'{SavePath}snap_halo_{Halo_ID}.h5'
        with h5py.File(Halo_File, 'a') as halo_hdf:
            # The boolean filter to find gas particles in halo
            gas_particles_inhalo =  np.array(np.where(
                                    (PartType0_Coordinates[:,0] <= Halo_x + Halo_R) &
                                    (PartType0_Coordinates[:,0] >= Halo_x - Halo_R) &
                                    (PartType0_Coordinates[:,1] <= Halo_y + Halo_R) &
                                    (PartType0_Coordinates[:,1] >= Halo_y - Halo_R) &
                                    (PartType0_Coordinates[:,2] <= Halo_z + 2*Halo_R) &
                                    (PartType0_Coordinates[:,2] >= Halo_z - 2*Halo_R)))
                
            if gas_particles_inhalo.shape[1]:
                print('Halo gas',Halo_ID)
                # resize the original datasets to add data conveniently
                # get the new size
                #print('shape', halo_hdf['PartType0/Masses'].shape)
                num_exist_gas = halo_hdf['PartType0/Masses'].shape[0]
                num_add_gas = gas_particles_inhalo.shape[1]
                new_num_gas = num_exist_gas + num_add_gas
                # resize
                # halo_hdf['PartType0/CenterOfMass'].resize((new_num_gas,3))
                halo_hdf['PartType0/Coordinates'].resize((new_num_gas,3))
                halo_hdf['PartType0/Masses'].resize((new_num_gas,))
                #halo_hdf['PartType0/ParticleIDs'].resize((new_num_gas,))
                # halo_hdf['PartType0/Potential'].resize((new_num_gas,))
                #halo_hdf['PartType0/Velocities'].resize((new_num_gas,3))
                # use slices command to add data
                # halo_hdf['PartType0/CenterOfMass'][num_exist_gas:new_num_gas,:] = PartType0_CenterOfMass[gas_particles_inZone].reshape(-1,3)
                halo_hdf['PartType0/Coordinates'][num_exist_gas:new_num_gas, :] = PartType0_Coordinates[gas_particles_inhalo].reshape(-1,3)
                halo_hdf['PartType0/Masses'][num_exist_gas:new_num_gas] = PartType0_Masses[gas_particles_inhalo].reshape(-1)
                #halo_hdf['PartType0/ParticleIDs'][num_exist_gas:new_num_gas] = PartType0_ParticleIDs[gas_particles_inhalo].reshape(-1)
                # halo_hdf['PartType0/Potential'][num_exist_gas:new_num_gas] = PartType0_Potential[gas_particles_inhalo].reshape(-1)
                #halo_hdf['PartType0/Velocities'][num_exist_gas:new_num_gas, :] = PartType0_Velocities[gas_particles_inhalo].reshape(-1,3)
                del num_exist_gas, num_add_gas, new_num_gas

            # The boolean filter to find DM particles in _x_y zone
            DM_particles_inhalo =   np.array(np.where(
                                (PartType1_Coordinates[:,0] <= Halo_x + Halo_R) &
                                (PartType1_Coordinates[:,0] >= Halo_x - Halo_R) &
                                (PartType1_Coordinates[:,1] <= Halo_y + Halo_R) &
                                (PartType1_Coordinates[:,1] >= Halo_y - Halo_R) &
                                (PartType1_Coordinates[:,2] <= Halo_z + 2*Halo_R) &
                                (PartType1_Coordinates[:,2] >= Halo_z - 2*Halo_R)))
            
            if DM_particles_inhalo.shape[1]:
                print('Halo_DM',Halo_ID)
                # get the new size
                #print('shape', halo_hdf['PartType1/Coordinates'].shape[0])
                num_exist_DM = halo_hdf['PartType1/Coordinates'].shape[0]
                num_add_DM = DM_particles_inhalo.shape[1]
                new_num_DM = num_exist_DM + num_add_DM
                # resize
                halo_hdf['PartType1/Coordinates'].resize((new_num_DM,3))
                #halo_hdf['PartType1/ParticleIDs'].resize((new_num_DM,))
                # halo_hdf['PartType1/Potential'].resize((new_num_DM,))
                #halo_hdf['PartType1/Velocities'].resize((new_num_DM,3))
                #use slices command to add data
                halo_hdf['PartType1/Coordinates'][num_exist_DM:new_num_DM,:] = PartType1_Coordinates[DM_particles_inhalo].reshape(-1,3)
                #halo_hdf['PartType1/ParticleIDs'][num_exist_DM:new_num_DM] = PartType1_ParticleIDs[DM_particles_inhalo].reshape(-1)
                # halo_hdf['PartType1/Potential'][num_exist_DM:new_num_DM] = PartType1_Potential[DM_particles_inhalo].reshape(-1)
                #halo_hdf['PartType1/Velocities'][num_exist_DM:new_num_DM,:] = PartType1_Velocities[DM_particles_inhalo].reshape(-1,3)
                del num_exist_DM, num_add_DM, new_num_DM
        
    del (# PartType0_CenterOfMass, 
        PartType0_Coordinates, PartType0_Masses,
        #PartType0_ParticleIDs, 
        # PartType0_Potential, 
        #PartType0_Velocities, 
        PartType1_Coordinates,# PartType1_ParticleIDs,
        # PartType1_Potential, 
        #PartType1_Velocities,
        gas_particles_inhalo, DM_particles_inhalo)
    gc.collect()

In [None]:
# Get the total number of chunks. Chunk index starts from 0 
# num_chunks = sim['num_files_snapshot']
# num_chunks = 3
snap_number = 92

for chunk_index in range(100, 200): # looping over all the chunks and separate particels into zones
    try:
        sort_particles(92, chunk_index, SnapPath, SavePath, 
            FOF_Halo_IDs, GroupPos, Group_R_Crit200, GroupFirstSub)
        logging.info(f'sorted {SnapPath}snap_0{snap_number}.{chunk_index}.hdf5')
    except Exception as e:
        logging.error(f'Failed sorting {SnapPath}snap_0{snap_number}.{chunk_index}.hdf5')
