## track_sfr.ipynb

This file is used to calculate the average star formation rate of all subhalos in FOF halos, tracking the evolution of SFR for the primary zoom-in targets in TNG-Cluster from snapshot 72 to snapshot 99.

**Main Functions**

- Calculate the average star formation rates of each FOF halo  
- Send data requests to the TNG-Cluster API  
- Track the SFR evolution based on the main progenitor branch

**Input Requirements**

- Local access to:

  - TNG-Cluster merger event file (`cluster_mergers.hdf5`)
  - Sorted group catalogs from snapshot 72 to snapshot 99 (e.g., `TargetHalo_MergerCat_099.hdf5`)
  - TNG-Cluster catalog (`TNG-Cluster_Catalog.hdf5`)
  - Sublink merger trees (`tng_cluster_mpbs/*.hdf5`)


In [None]:
# Basic Packages
import numpy as np
import h5py
import logging
import os
import shutil
import gc
import matplotlib.pyplot as plt

# Physics-related Packages
from astropy.cosmology import Planck15

In [None]:
base_cat_name = '/users_path/merger_trace/data/tng_cluster/tng_cluster_targetcat/'

mpb_base_path = '/users_path/merger_trace/data/tng_cluster/tng_cluster_mpbs'

In [None]:
# preparations for read box info from the url
import requests

baseUrl = 'http://www.tng-project.org/api/'
headers = {"api-key":"API KEY"}

def get(path, params=None):
    # make HTTP GET request to path
    r = requests.get(path, params=params, headers=headers)

    # raise exception if response code is not HTTP SUCCESS (200)
    r.raise_for_status()

    if r.headers['content-type'] == 'application/json':
        return r.json() # parse json responses automatically

    if 'content-disposition' in r.headers:
        filename = r.headers['content-disposition'].split("filename=")[1]
        with open(filename, 'wb') as f:
            f.write(r.content)
        return filename # return the filename string

    return r

In [None]:
def get_subhalo_maxM(Halo_ID, Sub_GrNr, Sub_Mass):
    
    """
    Finds the indices of the top three most massive subhalos for a given halo.

    Parameters:
    - Halo_ID: Array of halo IDs (single value or array with specific halo ID to match).
    - Sub_GrNr: Array indicating the group number (halo) each subhalo belongs to.
    - Sub_Mass: Array of subhalo masses.

    Returns:
    - Numpy array (CenterSub_Index, SecondSub_Index, ThirdSub_Index):
      - CenterSub_Index: Index of the most massive subhalo in the halo.
      - SecondSub_Index: Index of the second most massive subhalo.
      - ThirdSub_Index: Index of the third most massive subhalo.
    """
        
    find_Sub = np.where(Sub_GrNr == Halo_ID)[0]
    find_Sub_Mass = Sub_Mass[find_Sub]
    find_Sub_Mass_Sorted = np.argsort(find_Sub_Mass)

    CenterSub_Index = np.where(Sub_GrNr == Halo_ID)[0][find_Sub_Mass_Sorted[-1]]

    CenterSub_Index = np.array(CenterSub_Index)
    
    return CenterSub_Index

def Get_AvgSFR(SubhaloGrNr, SubhaloSFR, FOF_Halo_IDs):
    AvgSFR = np.zeros(FOF_Halo_IDs.shape)

    for i in range(len(FOF_Halo_IDs)):
        current_fof_id = FOF_Halo_IDs[i]
        sub_in_fof = (SubhaloGrNr == current_fof_id)
        subSFR_in_fof = SubhaloSFR[sub_in_fof]
        AvgSFR[i] = np.mean(subSFR_in_fof)

    return AvgSFR
    
def Get_HaloIDs(TargetHalo_cat):
    """
    Extracts halo IDs and their properties from the HDF5 catalog.

    Parameters:
    - TargetHalo_cat: Path to the HDF5 file containing the target halo catalog.

    Returns:
    - Target_Halo_IDs: List of selected halo IDs.
    - Subhalo_MaxMasses: Maximum subhalo masses for each selected halo.
    - Target_Halo_Rs_Crit200: Halo critical radius R_Crit200.
    - Target_GroupPoses: Positions of the selected halos.
    - Galaxy_nums: Number of subhalos per halo.
    """

    with h5py.File(TargetHalo_cat, 'r') as Target_hdf:
        # Read FOF Halo Info
        FOF_Halo_IDs =  Target_hdf['Group/FOF_Halo_IDs'][:]
        GroupFirstSub = Target_hdf['Group/GroupFirstSub'][:]
        Group_Nsubs = Target_hdf['Group/GroupNsubs'][:]

        # Read Subhalo Info
        SubhaloGrNr = Target_hdf['Subhalo/SubhaloGrNr'][:]
        SubhaloSFR = Target_hdf['Subhalo/SubhaloSFR'][:]
        SubhaloIDs = Target_hdf['Subhalo/Subhalo_IDs'][:]

    # Find FOF halos with subhalos and Read Info
    Indices_HaloWithSub = np.where( GroupFirstSub != -1)[0]
    Target_Halo_IDs = FOF_Halo_IDs[Indices_HaloWithSub]

    # Initialize array to get the central subhalo, second massive subhalo, and the third massive subhalo

    # Initialize galaxy numbers
    Galaxy_nums = Group_Nsubs[np.isin(FOF_Halo_IDs, Target_Halo_IDs)]
    AvgSFR = Get_AvgSFR(SubhaloGrNr, SubhaloSFR, FOF_Halo_IDs)

    return (Target_Halo_IDs, Galaxy_nums,AvgSFR, SubhaloGrNr, SubhaloIDs)

In [None]:
with h5py.File('/users_path/merger_trace/data/tng_cluster/tng_cluster_catalog/TNG-Cluster_Catalog.hdf5', 'r') as f:
    origID_parentsim = f['origID'][:]
    haloID__TNGCluster = f['haloID'][:]

cat_name ='/users_path/merger_trace/data/tng_cluster/tng_cluster_targetcat/TargetHalo_MergerCat_099.hdf5'

results = Get_HaloIDs(cat_name)

(Target_Halo_IDs, _,  _, _, _) = results


with h5py.File('/users_path/merger_trace/data/tng_cluster/tng_cluster_cluster_mergers/cluster_mergers.hdf5', 'r') as f:
    HaloID = f['HaloID'][:]
    Snap_coll = f['Snap_coll'][:]
    print(f.keys())


HaloID_Mergercat = np.array(list(set(HaloID))).astype(int)


Merger_Target_Halo_ID = range(len(haloID__TNGCluster))
Merger_Target_Halo_ID = np.array(Merger_Target_Halo_ID)

Snap_coll_eachhalo = dict()

for i in range(len(Merger_Target_Halo_ID)):
    current_halo_id = Merger_Target_Halo_ID[i]
    rel_merger_snap = Snap_coll[HaloID==current_halo_id]
    halo_id_frommergercat2TNGCluster = haloID__TNGCluster[current_halo_id] 
    Snap_coll_eachhalo[halo_id_frommergercat2TNGCluster] = rel_merger_snap

In [None]:
halo_dict = {}

cat_name_99 = base_cat_name + 'targethalo_cat_099/TargetHalo_MergerCat_099.hdf5'

Target_Halo_IDs_99, Galaxy_nums_99, AvgSFR_99, SubhaloGrNr_99, SubhaloIDs_99= Get_HaloIDs(cat_name_99)

for snap in range(72, 100):

    for halo_id in Snap_coll_eachhalo.keys():
        sub1st_at99 = int(SubhaloIDs_99[SubhaloGrNr_99==halo_id][0])

        with h5py.File(mpb_base_path+f'sublink_mpb_{sub1st_at99}.hdf5', 'r') as mpb_f:
            SubfindID = mpb_f['SubfindID'][:]
            SnapNum = mpb_f['SnapNum'][:]

        cat_name_snap = base_cat_name + f'targethalo_cat_0{snap}/TargetHalo_MergerCat_0{snap}.hdf5'

        Target_Halo_IDs_snap, Galaxy_nums_snap, AvgSFR_snap, SubhaloGrNr_snap, SubhaloIDs_snap= Get_HaloIDs(cat_name_snap)
        
        progenitor_id_snap = SubfindID[SnapNum==snap]

        if len(progenitor_id_snap)==0:
            continue

        halo_id_snap = int(SubhaloGrNr_snap[SubhaloIDs_snap==progenitor_id_snap][0])
        temp_sfr = AvgSFR_snap[Target_Halo_IDs_snap == halo_id_snap]
        temp_galnum = Galaxy_nums_snap[Target_Halo_IDs_snap == halo_id_snap]

        if len(temp_sfr) != 0 and len(temp_galnum) != 0:
            if halo_id not in halo_dict:
                halo_dict[halo_id] = {
                    "snaps": [],
                    "avgsfr": [],
                    "galnum": []
                }

            halo_dict[halo_id]["avgsfr"].append(temp_sfr[0]) 
            halo_dict[halo_id]["galnum"].append(temp_galnum[0])
            halo_dict[halo_id]["snaps"].append(snap)

            print(f'finish halo {halo_id} at {snap} where it was {halo_id_snap}')
        else:
            print(f'finish halo {halo_id} at {snap} where it was {halo_id_snap} with no results')



In [None]:
import pickle

with open('sfr_tracking.pkl', 'wb') as f:
    pickle.dump(halo_dict, f)
