In [33]:
from astropy.table import Table
import healpy as hp
import numpy as np
from multiprocessing import Pool
import h5py
from tqdm import tqdm
import os



Found 60 unique HEALPix indices


Processing HEALPix indices: 100%|██████████| 60/60 [00:06<00:00,  8.59it/s]


In [34]:
def save_in_standard_format(input_path, output_dir):
    """Save the input HDF5 file in the standard format for the HEALPix-based dataset."""
    data = Table.read(input_path)

    ra = data['RA']
    dec = data['DEC']

    healpix_indices = hp.ang2pix(16, ra, dec, lonlat=True, nest=True)
    unique_indices = np.unique(healpix_indices)

    print(f"Found {len(unique_indices)} unique HEALPix indices")

    for index in tqdm(unique_indices, desc="Processing HEALPix indices"):
        mask = healpix_indices == index

        grouped_data = data[mask]
        
        output_subdir = os.path.join(output_dir, f'healpix={index}')

        if not os.path.exists(output_subdir):
            os.makedirs(output_subdir)
        
        output_path = os.path.join(output_subdir, '001-of-001.h5')

        with h5py.File(output_path, 'w') as output_file:
            for key in data.keys():
                output_file.create_dataset(key, data=grouped_data[key])
            output_file.create_dataset('object_id', data=grouped_data['TARGETID'])

def main(args):
    output_dir = os.path.join(args.output_dir, 'datafiles')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    save_in_standard_format(args.input_path, output_dir)