In [None]:
import numpy as np
import os
import h5py
import matplotlib.pyplot as plt
from scipy.spatial import KDTree
from scipy.spatial import cKDTree


# Base path and file template
base_path = '/home/benidav/Illustris-3/TNG50/'
file_template = 'fof_subhalo_tab_099.{}.hdf5'

# Arrays to store data
halo_data = {
    'Group/GroupPos': [],
    'Group/GroupVel': [],
    'Group/Group_M_Crit200': []
}

subhalo_data = {
    'Subhalo/SubhaloPos': [],
    'Subhalo/SubhaloVel': [],
    'Subhalo/SubhaloMass': []
}

# Loop through files from 0 to 127
for i in range(128):
    file_path = os.path.join(base_path, file_template.format(i))
    
    # Skip if file doesn't exist
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist. Skipping...")
        continue
    
    # Open the file and extract data
    with h5py.File(file_path, 'r') as file:
        print(f"Processing {file_path}...")
        
        # Extract halo data
        for key in halo_data:
            if key in file:
                halo_data[key].append(file[key][:])
            else:
                print(f"Warning: {key} not found in {file_path}")
        
        # Extract subhalo data
        for key in subhalo_data:
            if key in file:
                subhalo_data[key].append(file[key][:])
            else:
                print(f"Warning: {key} not found in {file_path}")

# Convert lists to numpy arrays for easier manipulation
for key in halo_data:
    halo_data[key] = np.concatenate(halo_data[key]) if halo_data[key] else np.array([])

for key in subhalo_data:
    subhalo_data[key] = np.concatenate(subhalo_data[key]) if subhalo_data[key] else np.array([])

# Print summary
print("\nSummary of extracted data:")
print(f"Halo data:")
for key, value in halo_data.items():
    print(f"- {key}: {value.shape if value.size > 0 else 'Empty'}")

print(f"\nSubhalo data:")
for key, value in subhalo_data.items():
    print(f"- {key}: {value.shape if value.size > 0 else 'Empty'}")