#  Matching Catalogs by Proximity with NumCosmo

---
**License**

 Matching Catalogs by Proximity with NumCosmo

 Mon Dec 2 18:30:00 2024\
 Copyright  2024\
 Cinthia Nunes de Lima <cinthia.n.lima@uel.br> \ Sandro Dias Pinto Vitenti <vitenti@uel.br>

---
---
 matching_by_proximity\
 Copyright (C) 2024 Cinthia Nunes de Lima <cinthia.n.lima@uel.br>, Sandro Dias Pinto Vitenti <vitenti@uel.br>

 numcosmo is free software: you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 numcosmo is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 See the GNU General Public License for more details.

 You should have received a copy of the GNU General Public License along
 with this program.  If not, see <http://www.gnu.org/licenses/>.
 
---

## Libraries & Configs

In [1]:
import math
import sys

import os
import numpy as np
import matplotlib.pyplot as plt

from astropy.io import fits
from astropy.table import Table
import pandas as pd

sys.path.insert(0, "/global/homes/c/cinlima/gcrcatalogs-new/gcr-catalogs")
sys.path.insert(0, "/global/homes/c/cinlima/")
sys.path.insert(0, "/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/MachingCatalogs")


import GCRCatalogs
#Put nersc if using nersc
GCRCatalogs.set_root_dir_by_site("nersc")

print('GCRCatalogs =', GCRCatalogs.__version__, '|' ,'GCR =', GCRCatalogs.GCR.__version__)


from numcosmo_py import Nc, Ncm

import time

# GCRCatalogs.get_available_catalog_names()


GCRCatalogs = 1.9.0 | GCR = 0.9.2


## Catalogs

In [2]:
#Cosmo DC2
cosmodc2= GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image')

#Cosmo DC2 + RedMaPPer
catalog_dc2_redmapper = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_redmapper_v0.8.1')

In [3]:
# Restrictions on richness, mass and redshift
min_richness = 5
min_halo_mass = 1e13 #Msun
redshift_max = 1.2

#cDC2 + RM data
quantities = catalog_dc2_redmapper.list_all_quantities()
cluster_quantities = [q for q in quantities if 'member' not in q]
member_quantities = [q for q in quantities if 'member' in q]
    
query = GCRCatalogs.GCRQuery('(richness > ' + str(min_richness) +')')
cluster_data = Table(catalog_dc2_redmapper.get_quantities(cluster_quantities, [query])) # cDC2 + RM cluster data
member_data = Table(catalog_dc2_redmapper.get_quantities(member_quantities)) # cDC2 + RM member data

#cDC2 data
quantities_wanted = ['redshift','halo_mass','halo_id','galaxy_id','ra','dec', 'is_central']
query = GCRCatalogs.GCRQuery('(halo_mass > ' + str(min_halo_mass) +') & (redshift < ' + str(redshift_max) +') & (is_central < ' + str(redshift_max) +') ')
truth_data = Table(cosmodc2.get_quantities(quantities_wanted, [query]))

In [4]:
truth_data[truth_data['is_central'] == True]

halo_id,redshift,galaxy_id,dec,halo_mass,ra,is_central
int64,float64,int64,float64,float64,float64,bool
4500151475,0.04818676962281132,9437500128,-40.400894272468605,16891355712901.41,49.6366034309591,True
7300151475,0.04825926322238838,9437500172,-41.3447799174979,74621860826501.4,50.957598308861314,True
13700151475,0.04554516035339984,9437500288,-40.832338501224754,33506664194163.383,51.00887984172898,True
1100151464,0.0643846735498792,9437500798,-41.15855360868742,17911434075583.098,50.57641425057562,True
18300151442,0.10736840205357634,9437504902,-39.168679326038024,13042641686354.93,50.77601162448169,True
36400151442,0.12743613052494784,9437505130,-39.336294511446724,48388878878287.33,50.60510200022939,True
43700151442,0.11375834498747261,9437505232,-40.23977306100707,69213477135143.664,49.50475455575477,True
52300151442,0.1259639067546261,9437505371,-39.64548257286864,62974036412214.086,51.070122482361214,True
1200151432,0.12775858760889358,9437509536,-39.09754001831282,34125997485791.55,50.36982165635327,True
...,...,...,...,...,...,...


In [5]:
cluster_data

ra_cen_4,p_cen_2,id_cen_1,redshift_true_cg,id_cen_3,richness,ra,dec_cen_1,ra_cen_3,p_cen_3,dec_cen_4,dec_cen_0,scaleval,p_cen_0,redshift_err,p_cen_4,ra_cen_2,id_cen_0,richness_err,cluster_id,p_cen_1,redshift,dec,maskfrac,ra_cen_1,ra_cen_0,id_cen_2,dec_cen_3,id_cen_4,dec_cen_2
float64,float32,int64,float32,int64,float32,float64,float64,float64,float32,float64,float64,float32,float32,float32,float32,float64,int64,float32,int32,float32,float32,float64,float32,float64,float64,int64,float64,int64,float64
52.51910078836549,0.00010373697,1312932492,0.51072353,1312945763,190.70665,52.520436430812914,-25.71783277825776,52.527338368381535,1.5151493e-06,-25.727465056757442,-25.717996124671657,1.0001404,0.79138356,0.0027861558,3.0111536e-07,52.50825021974284,1312945563,3.7469666,10,0.20851088,0.5135129,-25.717996124671657,0.0,52.519490104517175,52.520436430812914,1312932494,-25.72165959330821,1313029113,-25.716281990601974
56.92602537669649,1.8312696e-06,2000524824,0.5740297,2000536980,179.12813,56.920376528125075,-27.199286148618782,56.92348931320997,5.984015e-08,-27.211258176404048,-27.20434071042304,1.0005391,0.99996513,0.0032183963,1.7345036e-08,56.94124732225573,2000524808,3.6934361,18,3.297315e-05,0.5712697,-27.20434071042304,0.0,56.927010885055374,56.920376528125075,2000525108,-27.20003879579277,2000536915,-27.204497548758997
56.999685738888104,0.026989846,2002535425,1.0347759,2002541935,124.02715,56.99849698116052,-26.718600369742887,56.98989225804766,0.0052658287,-26.72317780680594,-26.7172081871169,1.000476,0.93527913,0.0065176566,0.00042643907,57.00496745151956,2002535198,2.4172914,24,0.03203878,1.040338,-26.7172081871169,0.0,57.0029430173563,56.99849698116052,2002535403,-26.719138984756995,2002535397,-26.707287415263906
58.54795919291436,0.0005243642,2000760286,0.6426174,2000760140,167.33603,58.56362402841767,-27.440237841615975,58.56448660673249,1.3199483e-05,-27.441554361218707,-27.429961828331713,1.0015866,0.9958548,0.0060331975,6.2297656e-10,58.57655423797249,2000759779,3.6777163,30,0.0036076254,0.63518524,-27.429961828331713,0.0,58.55617219161547,58.56362402841767,2000760010,-27.44671248175573,2000760130,-27.4342933293566
58.34775850906022,0.008745701,2000292501,0.43919736,2000292504,111.9147,58.35519782866568,-26.866270557202213,58.35345231858996,4.2903914e-05,-26.873768459561727,-26.867687128773035,1.0000473,0.7297119,0.003674398,3.132921e-05,58.358819114610974,2000289335,3.366554,71,0.2614682,0.44230303,-26.867687128773035,0.0,58.34992458608299,58.35519782866568,2000292571,-26.864486047315793,2000292608,-26.87437529777802
57.78520845355392,6.477703e-05,2000290835,0.4680865,2000291009,104.439766,57.76575707007693,-28.13711378351597,57.77313021619973,5.2879543e-05,-28.130550627706278,-28.119101936484828,1.0014223,0.99970347,0.0035005552,8.6784746e-07,57.76312485832799,2000290819,2.9570596,73,0.00017803152,0.46697217,-28.119101936484828,0.0,57.75748991205223,57.76575707007693,2000290930,-28.112182213542138,2000290977,-28.123262961841775
58.49500042467478,3.0540557e-09,2000286723,0.44349608,2000286566,103.49396,58.509466974185045,-27.01259056457905,58.50636920080823,2.2677467e-09,-27.003486551712935,-26.989676182075844,1.0021384,0.99999905,0.0036729097,1.5482234e-09,58.4977149963539,2000286447,3.3524604,109,9.2802935e-07,0.4413634,-26.989676182075844,0.0,58.48648688545692,58.509466974185045,2000286516,-26.99741400320372,2000286533,-27.00358318558357
54.498303753402574,0.0004447735,1940324773,1.0758829,1940324997,76.85877,54.494027152041674,-27.89888605332762,54.47973899232569,4.1882024e-05,-27.89828451222208,-27.896589555863095,1.0017645,0.6544826,0.0070620407,1.6846216e-05,54.48695011233644,1940324795,2.2175314,116,0.34501392,1.0629631,-27.896589555863095,0.0,54.496631285075914,54.494027152041674,1940324810,-27.890609698478475,1940324995,-27.90097379323573
58.116146694872974,0.00023012425,2002528410,1.0311339,2002528366,75.89771,58.137368555958155,-27.200526363363345,58.135413610969586,9.39317e-05,-27.206664475172484,-27.199784884537003,1.00349,0.991116,0.0069939015,2.9116934e-05,58.13909799735227,2002528355,2.26323,133,0.008530826,1.037697,-27.199784884537003,0.0,58.14009283286545,58.137368555958155,2002528530,-27.20713970237154,2002528509,-27.196502840091497
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


## Matching by Proximity

In [6]:
halo_data = truth_data[truth_data['is_central'] == True]
z1 = halo_data['redshift']
theta1= np.radians(halo_data['dec'])
phi1 = np.radians(halo_data['ra'])

z2 = cluster_data['redshift']
theta2 = cluster_data['dec']
phi2 = cluster_data['ra']


In [25]:
import tqdm

snn = Ncm.SphereNN()
# cosmo = Nc.HICosmoDEXcdm()
# dist = Nc.Distance.new(3.0)
# dist.prepare(cosmo)

# r_a = np.array([dist.comoving(cosmo, z) for z in z1])

r_a = np.ones(len(theta1))
snn.insert_array(r_a, theta1, phi1)

snn.rebuild()

    
# for theta, phi in zip(theta2, phi2):
#     snn.knn_search(theta, phi, 10)

# for r, theta, phi in zip(r_a, theta2, phi2):
#         snn.insert(r, theta, phi)

    # Do the above using tqdm

for i, (theta, phi, z) in tqdm.tqdm(
    enumerate(zip(theta2, phi2, z2)), total=len(theta2)
    ):
    # r = dist.comoving(cosmo, z)
    indices = np.array(snn.knn_search(1, theta, phi, 10))

100%|██████████| 45081/45081 [00:15<00:00, 2871.78it/s]


In [26]:
indices

array([161807, 161912, 240757, 240759, 161190, 161812, 160631, 161847,
       160881, 240461])