In [None]:
import pandas as pd
import numpy as np
from astropy.coordinates import SkyCoord
from astropy import units as u
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings('ignore')
from pyarrow import ArrowInvalid
from tqdm import tqdm

In [None]:
nuv_catalog = pd.read_parquet('data/e23456/e23456-nd-0-catalog.parquet')
fuv_catalog = pd.read_parquet('data/e23456/e23456-fd-0-catalog.parquet')

In [None]:
def compute_separation_matrix(ra1, dec1, ra2, dec2):
    """
    Compute angular separations between two sets of coordinates efficiently.
    Uses astropy's SkyCoord separation matrix, which is vectorized and much faster.
    Returns a (len(ra1), len(ra2)) array of separations in arcseconds.
    """
    coords1 = SkyCoord(ra=ra1*u.degree, dec=dec1*u.degree)
    coords2 = SkyCoord(ra=ra2*u.degree, dec=dec2*u.degree)
    # Use astropy's separation matrix (broadcasts efficiently)
    # This returns a Quantity array of shape (len(coords1), len(coords2))
    sep_matrix = coords1[:, None].separation(coords2[None, :]).arcsec
    return sep_matrix

# compute_separation_matrix(
#     nuv_catalog['RA'].values, nuv_catalog['DEC'].values,
#     fuv_catalog['RA'].values, fuv_catalog['DEC'].values
# )



In [None]:
def compute_crossmatch(nuv_catalog,fuv_catalog,max_separation_arcsec=3.0):
    separations = compute_separation_matrix(
        nuv_catalog['RA'].values, nuv_catalog['DEC'].values,
        fuv_catalog['RA'].values, fuv_catalog['DEC'].values
    )

    print(f"Finding matches within {max_separation_arcsec}\"...")
    matches = []
    nuv_matched = set()
    fuv_matched = set()

    # find the closest FUV source within 3 arcseconds for each NUV source
    nuv_matches = np.full((len(nuv_catalog),2), None)
    # Find best matches (closest separation for each source)
    for i in tqdm(range(len(nuv_catalog))):
        valid_matches = separations[i, :] <= max_separation_arcsec
        if np.any(valid_matches):
            j = np.argmin(separations[i, :])
            nuv_matches[i] = [int(j),float(separations[i,j])]

    fuv_matches = np.full((len(fuv_catalog),2), None)
    # Find best matches (closest separation for each source)
    for i in tqdm(range(len(fuv_catalog))):
        valid_matches = separations[:, i] <= max_separation_arcsec
        if np.any(valid_matches):
            j = np.argmin(separations[:, i])
            fuv_matches[i] = [int(j),float(separations[j,i])]

    # check whether the matches point to each other across catalogs
    cross_matches = []
    for i in tqdm(range(len(nuv_catalog))):
        if nuv_matches[i][0] is not None:
            fuv_index = nuv_matches[i][0]
            if fuv_matches[fuv_index][0] == i:
                cross_matches.append((i, fuv_index, nuv_matches[i][1]))

    return np.array(cross_matches)


In [None]:
def crossmatch(nuv_catalog,fuv_catalog,max_separation_arcsec=3.0):
    cross_matches = compute_crossmatch(nuv_catalog,fuv_catalog,
                                       max_separation_arcsec=max_separation_arcsec)
    
    # add FUV crossmatches to NUV catalog
    nuv_catalog['FUV_MATCH_INDEX'] = np.nan
    nuv_catalog.loc[cross_matches[:, 0], 'FUV_MATCH_INDEX'] = np.array(cross_matches[:, 1],dtype=int)
    nuv_catalog['FUV_MATCH_SEP'] = np.nan
    nuv_catalog.loc[cross_matches[:, 0], 'FUV_MATCH_SEP'] = np.round(cross_matches[:, 2],6)

    # add NUV crossmatches to FUV table
    fuv_catalog['NUV_MATCH_INDEX'] = np.nan
    fuv_catalog.loc[cross_matches[:, 1], 'NUV_MATCH_INDEX'] = np.array(cross_matches[:, 0],dtype=int)
    fuv_catalog['NUV_MATCH_SEP'] = np.nan
    fuv_catalog.loc[cross_matches[:, 1], 'NUV_MATCH_SEP'] = np.round(cross_matches[:, 2],6)

    return nuv_catalog, fuv_catalog

In [None]:
nuv_catalog, fuv_catalog = crossmatch(nuv_catalog,fuv_catalog)

In [None]:
nuv_catalog