# 0 Preparations

In [None]:
# Basic Packages
import numpy as np
import h5py
import logging
import os
import shutil
import gc
import matplotlib.pyplot as plt
import time
import requests

# Physics-related Packages
from astropy.cosmology import Planck15

## 0.1 Load Paths

In [None]:
Target_cat_name = "/users_path/merger_trace/tng_cluster/tng_cluster_targetcat/targethalo_cat_099/TargetHalo_MergerCat_099.hdf5"

## 0.2 Constants and Parameters

In [None]:
# preparations for read box info from the url
import requests

baseUrl = 'http://www.tng-project.org/api/'
headers = {"api-key":"API KEY"}

def get(path, params=None):
    # make HTTP GET request to path
    r = requests.get(path, params=params, headers=headers)

    # raise exception if response code is not HTTP SUCCESS (200)
    r.raise_for_status()

    if r.headers['content-type'] == 'application/json':
        return r.json() # parse json responses automatically

    if 'content-disposition' in r.headers:
        filename = r.headers['content-disposition'].split("filename=")[1]
        with open(filename, 'wb') as f:
            f.write(r.content)
        return filename # return the filename string

    return r

## Get Merger Trees

In [None]:
def get_subhalo_maxM(Halo_ID, Sub_IDs, Sub_GrNr, Sub_MassType):
    
    """
    Finds the maximum subhalo mass for a given halo.
    
    Parameters:
    - Halo_ID: Array of halo IDs.
    - Sub_GrNr: Array indicating the group number each subhalo belongs to.
    - Sub_MassType: Array of subhalo masses.

    Returns:
    - Tuple (Subhalo_MaxMass, Subhalo_Count): 
      - Subhalo_MaxMass: Maximum subhalo mass for each halo.
      - Subhalo_Count: Number of subhalos for each halo.
    """
        
    find_Sub = np.where(Sub_GrNr == Halo_ID)[0]
    find_Sub_MassType = Sub_MassType[find_Sub,:]
    find_Sub_Mass = find_Sub_MassType[:,0] + find_Sub_MassType[:,1]
    find_maxsubhalo_ID = Sub_IDs[find_Sub][0]
    
    return np.max(find_Sub_Mass), find_Sub.shape[0], find_maxsubhalo_ID

def Get_HaloIDs(TargetHalo_cat):
    """
    Extracts halo IDs and their properties from the HDF5 catalog.

    Parameters:
    - TargetHalo_cat: Path to the HDF5 file containing the target halo catalog.

    Returns:
    - Target_Halo_IDs: List of selected halo IDs.
    - Subhalo_MaxMasses: Maximum subhalo masses for each selected halo.
    - Target_Halo_Rs_Crit200: Halo critical radius R_Crit200.
    - Target_GroupPoses: Positions of the selected halos.
    - Galaxy_nums: Number of subhalos per halo.
    """

    with h5py.File(TargetHalo_cat, 'a') as Target_hdf:
        FOF_Halo_IDs =  Target_hdf['Group/FOF_Halo_IDs'][:]
        GroupFirstSub = Target_hdf['Group/GroupFirstSub'][:]
        GroupPos = Target_hdf['Group/GroupPos'][:]
        Group_R_Crit200 = Target_hdf['Group/Group_R_Crit200'][:]
        Group_M_Crit200 = Target_hdf['Group/Group_M_Crit200'][:]
        SubhaloGrNr = Target_hdf['Subhalo/SubhaloGrNr'][:]
        SubhaloMassType =  Target_hdf['Subhalo/SubhaloMassType'][:]
        Sub_IDs =  Target_hdf['Subhalo/Subhalo_IDs'][:]

    # Get Halo IDs and Halo Radius
    Indices_HaloWithSub = np.where( GroupFirstSub != -1)[0]
    Target_Halo_IDs = FOF_Halo_IDs[Indices_HaloWithSub]
    Target_Halo_Rs_Crit200 = Group_R_Crit200[Indices_HaloWithSub]
    Target_GroupPoses = GroupPos[Indices_HaloWithSub]
    Target_Group_Ms_Crit200 = Group_M_Crit200 [Indices_HaloWithSub]

    # Get Halo Max Subhalo Mass
    Subhalo_MaxMasses = np.zeros(Target_Halo_IDs.shape)
    Galaxy_nums = np.zeros(Target_Halo_IDs.shape)
    Max_Subhalo_IDs = np.zeros(Target_Halo_IDs.shape)

    for i in range(len(Target_Halo_IDs)):
        Halo_ID = Target_Halo_IDs[i]
        Subhalo_MaxMasses[i],  Galaxy_nums[i], Max_Subhalo_IDs[i] = get_subhalo_maxM(Halo_ID, Sub_IDs, SubhaloGrNr, SubhaloMassType)

    return Target_Halo_IDs, Subhalo_MaxMasses, Target_Halo_Rs_Crit200, Target_GroupPoses, Galaxy_nums, Max_Subhalo_IDs, Target_Group_Ms_Crit200

In [None]:
Halo_IDs = Get_HaloIDs(Target_cat_name)[0]
Subhalo_IDs = Get_HaloIDs(Target_cat_name)[5]
Target_Group_Ms_Crit200 = Get_HaloIDs(Target_cat_name)[6]

In [None]:
Halo_IDs

In [None]:
def get_with_retries(url, max_retries=5, wait_time=30):
    """ donwload files, retry when 403 """
    headers =  {"api-key":"API KEY"}
    
    for attempt in range(max_retries):
        try:
            r = requests.get(url, headers=headers, timeout=100)
            
            if r.status_code == 403:
                print(f"403 Forbidden! Waiting {wait_time} seconds before retrying ({attempt+1}/{max_retries})")
                time.sleep(wait_time)  # wait and retry
                continue  
            
            r.raise_for_status() 

            if r.headers['content-type'] == 'application/json':
                return r.json() # parse json responses automatically

            if 'content-disposition' in r.headers:
                filename = r.headers['content-disposition'].split("filename=")[1]
                with open(filename, 'wb') as f:
                    f.write(r.content)
                return filename # return the filename string
            
            return r
        
        except requests.exceptions.RequestException as e:
            print(f"âš  Request failed: {e}. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)

    print("Failed after multiple retries.")
    return None  # if fail, return None

for i in range(len(Halo_IDs)):
    subhalo_id = int(Subhalo_IDs[i])
    Halo_M_Crit200 = Target_Group_Ms_Crit200[i]
    sub_mpb = f'./sublink_mpb_{subhalo_id}.hdf5'
    
    if os.path.exists(sub_mpb):
        print(f"File {sub_mpb} already exists. Skipping download.")
    else:
        print(f"File {sub_mpb} not found. Downloading from Illustris API...")
        mpb_url = f"http://www.illustris-project.org/api/TNG300-1/snapshots/99/subhalos/{subhalo_id}/sublink/mpb.hdf5"
        
        # get files
        sub_mpb = get_with_retries(mpb_url)
        
        # time.sleep(1)