In [9]:
import sys
sys.path.append('../')

In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from scripts.utils import set_mpl, data_path
from scripts.cross_match_scripts import cross_match_data_frames,  add_separation_columns, desi_reliable_magnitudes
set_mpl()

%matplotlib inline
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy import coordinates
import skymapper as skm
import healpy as hp


matplotlib settings set


In [11]:
def find_secure_ctps(desi_x, x_prefix = 'csc'):

    desi_x = desi_x.copy()
    x_name = f'{x_prefix}_name'
    x_ra_name = f'{x_prefix}_ra'
    x_dec_name = f'{x_prefix}_dec'
    x_err_name = f'{x_prefix}_r_98'
    x_false_name = f'{x_prefix}_r_false'


    #remove DESI LIS duplicates
    desi_x = desi_x.query('brick_primary == True')
    print('desi reliable catalog:', len(desi_x))
    desi_x = desi_x.sort_values(by=[x_name, x_ra_name, x_dec_name, 'flux_g'], ascending=[True, True, True, False])
    desi_x = desi_x.drop_duplicates(subset=[x_name, x_ra_name, x_dec_name,  'ra', 'dec'], keep='first')
    print('desi reliable catalog after non-primary brick and duplicates removal:', len(desi_x))

    #calculate r_false
    rmin = 10 #inner R of annulus in arcsec
    rmax = 30 # outer R of annulus in arcsec
    num_src =  desi_x.groupby(by = [x_name, pd.cut(desi_x.dist_arcsec, [rmin, rmax]) ]).size().unstack()
    src_dens = num_src/annuli_area_deg2(rmin, rmax)
    r_falses = r_false(src_dens)
    r_falses.columns = [x_prefix+'_r_false']
    desi_x = desi_x.merge(r_falses, left_on = x_name, right_index = True)
    desi_x.sort_values(by=[x_name, 'dist_arcsec'], inplace=True)

    #find closest and second closest distances
    gr = desi_x.groupby(x_name)['dist_arcsec']
    desi_x['second_closest_dist'] = gr.transform(lambda x: x.nsmallest(2).max())
    desi_x['closest_dist'] = gr.transform(min)


    #counterparts filter: closest desi objects, but only if they are closer than r_false and r_98. In addition, we demand that there are no other objects closer than r_false
    ctps = desi_x.copy()
    ctps = ctps.query(f"(dist_arcsec==closest_dist) & (closest_dist<{x_false_name} & closest_dist<{x_err_name}) & (second_closest_dist>{x_false_name})")
    print('delete ', ctps.duplicated(subset='desi_id').sum(), ' duplicated desi_id')
    ctps = ctps.drop_duplicates(subset='desi_id')
    print('total number of ctps:', len(ctps))
    ctps['is_counterpart'] = True


    #field sources filter: all desi objects within 10-30 arcsec which are linked to only ONE X-ray source
    field = desi_x.copy()
    field = field.query(f"dist_arcsec>={rmin} & dist_arcsec<={rmax} ")
    print('number of possible field sources: ', len(field))

    tmp_df = field.groupby(by = 'desi_id', ).agg({x_name: 'count'})
    clear_field_ids = tmp_df.index[tmp_df[x_name]==1]
    field = field[field.desi_id.isin(clear_field_ids)==True]
    field['is_counterpart'] = False

    print(f'Final number of secure field sources: ', len(field))
    
    return pd.concat([ctps, field])



def annuli_area_deg2(r_in_arcsec, r_out_arcsec):
    r_in_deg = r_in_arcsec/3600
    r_out_deg = r_out_arcsec/3600
    area = np.pi*(r_out_deg**2 - r_in_deg**2)
    return area

def r_false(desi_rho_deg2, thresh = 0.03):
    ''' Belvedersky+ 2022 '''
    desi_rho_arcsec2 = desi_rho_deg2/(3600**2)
    return np.sqrt(-np.log(1-thresh)/(np.pi*desi_rho_arcsec2))


# load CSC data (all sky, matched with DESI)

the data was obtained with notebooks from `all_sky_crossmatch` repository, see  `2_all-sky-training/0_catalog-download.ipynb` file.

In [12]:
csc = pd.read_pickle(data_path+'csc_all_sky.pkl')
csc = csc.add_prefix('csc_')

csc_desi = pd.read_pickle(data_path+'csc_all_sky_desi_matched.gz_pkl', compression='gzip')
csc_desi.columns = [x.replace('desi_','')  if 'desi' in x else x for x in csc_desi.columns]
csc_desi.rename(columns={'id':'desi_id'}, inplace=True)

cols = [x for x in csc.columns if ('ra'  not in x )&('dec' not in x)  ] #ra or decs are by 1e-5 per cent different in two dataframes in some rows
csc_desi = pd.merge(csc_desi, csc[cols], on=['csc_name'], how='left')
csc_desi = desi_reliable_magnitudes(csc_desi, s_n_threshold=4) #should be 4 for LH paper, also LH coords should be excluded


ra_min = 154.763934
ra_max = 167.084550
dec_min = 54.124219
dec_max = 61.236185
in_lh_mask = csc_desi.eval('(csc_ra >= @ra_min) & (csc_ra <= @ra_max) & (csc_dec >= @dec_min) & (csc_dec <= @dec_max)')
csc_desi = csc_desi[~in_lh_mask]


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [13]:
print(csc_desi.csc_name.nunique())
print(csc_desi.desi_id.nunique())


157958
3142317


In [5]:
csc_training = find_secure_ctps(csc_desi, x_prefix = 'csc')


desi reliable catalog: 3726091
desi reliable catalog after non-primary brick and duplicates removal: 3726084
delete  92  duplicated desi_id
total number of ctps: 80056
number of possible field sources:  3187061
Final number of secure field sources:  2411902


In [6]:
csc_training.groupby('csc_secure')['is_counterpart'].value_counts()

csc_secure  is_counterpart
False       False              276734
            True                 8063
True        False             2135168
            True                71993
Name: is_counterpart, dtype: int64

----

In [7]:
#save cat to pickle
csc_training_secure = csc_training[csc_training.csc_secure==True]
csc_training_secure.to_pickle(data_path+'csc_allsky_desi_r30_gaia_dered_training.gz_pkl', compression='gzip')

In [8]:
csc_training_secure

Unnamed: 0,csc_name,csc_ra,csc_dec,release,objid,brickid,ra,dec,brick_primary,flux_g,flux_r,flux_z,flux_w1,flux_w2,flux_w3,flux_w4,flux_ivar_g,flux_ivar_r,flux_ivar_z,flux_ivar_w1,flux_ivar_w2,flux_ivar_w3,flux_ivar_w4,dered_mag_g,dered_mag_r,dered_mag_z,dered_mag_w1,dered_mag_w2,dered_mag_w3,dered_mag_w4,snr_g,snr_r,snr_z,snr_w1,snr_w2,snr_w3,snr_w4,parallax,parallax_ivar,pmra,pmra_ivar,pmdec,pmdec_ivar,dist_arcsec,desi_id,csc_err_ellipse_r0,csc_err_ellipse_r1,csc_err_ellipse_ang,csc_significance,csc_likelihood,csc_likelihood_class,csc_conf_flag,csc_dither_warning_flag,csc_extent_flag,csc_pileup_flag,csc_sat_src_flag,csc_streak_src_flag,csc_var_flag,csc_flux_aper_s,csc_flux_aper_lolim_s,csc_flux_aper_hilim_s,csc_flux_aper_m,csc_flux_aper_lolim_m,csc_flux_aper_hilim_m,csc_r_98,csc_flux_05_2,csc_flux_05_2_err,csc_secure,all_mag_g,rel_mag_g,rel_dered_mag_g,rel_flux_corr_g,all_mag_r,rel_mag_r,rel_dered_mag_r,all_mag_z,rel_mag_z,rel_dered_mag_z,all_mag_w1,rel_mag_w1,rel_dered_mag_w1,vega_mag_w1,all_mag_w2,rel_mag_w2,rel_dered_mag_w2,vega_mag_w2,all_mag_w3,rel_mag_w3,rel_dered_mag_w3,vega_mag_w3,all_mag_w4,rel_mag_w4,rel_dered_mag_w4,vega_mag_w4,rel_flux_corr_r,rel_flux_corr_z,rel_dered_g_r,rel_dered_g_z,rel_dered_r_z,rel_dered_z_w1,rel_dered_r_w2,rel_dered_w1_w2,rel_dered_z_w3,rel_dered_r_w4,rel_dered_w3_w4,csc_r_false,second_closest_dist,closest_dist,is_counterpart
158,2CXO J000001.4+004822,0.006048,0.806176,9010,236,334688,0.006153,0.806308,1,0.308400,0.731828,1.758477,5.784090,5.948933,72.709250,-3279.48140,1682.19000,948.46300,129.734910,2.735729,0.612379,0.000688,0.000010,23.690054,22.780264,21.854317,20.589422,20.560839,17.845373,,12.648855,22.538210,20.029266,9.566907,4.655316,1.906998,-10.234280,0.0,0.0,0.0,0.0,0.0,0.0,0.607102,9010_334688_236,3.726590,3.135535,46.707312,2.971429,17.027296,TRUE,False,False,False,False,False,False,False,1.384965e-15,3.462414e-16,2.423690e-15,1.626135e-15,8.905024e-16,2.361767e-15,5.565419,3.011100e-15,1.800057e-15,True,23.777214,23.777214,23.690054,1.925701e-15,22.838977,22.838977,22.780264,21.887158,21.887158,21.854317,20.594412,20.594412,20.589422,17.895412,20.563902,20.563902,20.560839,17.224902,17.846026,,,,,,,,2.700685e-15,3.010245e-15,0.909790,1.835737,0.925947,1.264895,2.219425,0.028583,,,,0.916654,3.528270,0.607102,True
205,2CXO J000001.7+003824,0.007351,0.640170,9010,289,334688,0.007209,0.640172,1,0.446684,0.791833,0.833604,2.084917,-1.714462,-19.942390,-1472.02190,3490.00300,2314.69970,349.076630,3.086889,0.657554,0.000768,0.000011,23.288145,22.694910,22.664875,21.697306,,,,26.388424,38.096140,15.574721,3.663103,-1.390252,-0.552538,-4.863450,0.0,0.0,0.0,0.0,0.0,0.0,0.510857,9010_334688_289,1.093394,0.871849,104.849964,5.485207,110.292372,TRUE,False,False,False,False,False,False,False,2.565676e-15,1.782927e-15,3.348424e-15,2.359721e-15,1.587449e-15,3.131993e-15,1.598058,4.925396e-15,1.555056e-15,True,23.374999,23.374999,23.288145,2.788383e-15,22.753416,22.753416,22.694910,22.697601,22.697601,22.664875,21.702278,,,,,,,,,,,,,,,,2.921564e-15,1.426851e-15,0.593235,0.623270,0.030035,,,,,,,0.744180,4.253834,0.510857,True
247,2CXO J000001.7-500850,0.007301,-50.147369,9010,148,76434,0.007012,-50.147418,1,0.284839,0.432803,0.867132,6.043220,7.337446,32.474842,-380.95505,3240.27600,2658.35080,174.380580,4.183603,1.071669,0.001249,0.000015,23.822926,23.381940,22.639498,20.544506,20.334711,18.720829,,16.214005,22.314981,11.450764,12.360716,7.595829,1.147492,-1.469903,0.0,0.0,0.0,0.0,0.0,0.0,0.687582,9010_76434_148,0.743637,0.740034,136.172581,5.772718,368.270366,TRUE,False,False,False,False,False,False,True,1.081031e-15,7.265947e-16,1.417746e-15,9.389929e-16,6.843508e-16,1.193635e-15,1.198873,2.020024e-15,6.071961e-16,True,23.863501,23.863501,23.822926,1.703884e-15,23.409274,23.409274,23.381940,22.654787,22.654787,22.639498,20.546829,20.546829,20.544506,17.847829,20.336138,20.336138,20.334711,16.997138,18.721132,,,,,,,,1.551685e-15,1.460594e-15,0.440986,1.183428,0.742442,2.094992,3.047229,0.209795,,,,0.968094,3.970588,0.687582,True
421,2CXO J000002.0-500535,0.008631,-50.093100,9010,223,77358,0.008449,-50.093190,1,0.088156,0.094312,0.334148,6.392944,5.055263,99.258520,3299.90000,3502.84230,2313.42650,192.323900,4.329451,1.102292,0.000996,0.000012,25.098747,25.037910,23.675789,20.483566,20.739300,17.507795,13.703640,5.217513,4.536207,4.633996,13.302012,5.307524,3.131895,11.487512,0.0,0.0,0.0,0.0,0.0,0.0,0.531147,9010_77358_223,0.830563,0.793760,126.993730,3.888889,53.681204,TRUE,False,False,False,False,False,False,False,,,,4.377979e-16,2.334922e-16,6.323748e-16,1.312859,,,True,25.136870,25.136870,25.098747,5.261525e-16,25.063583,25.063583,25.037910,23.690153,23.690153,23.675789,20.485748,20.485748,20.483566,17.786748,20.740641,20.740641,20.739300,17.401641,17.508081,,,,13.703748,13.703748,13.70364,7.083748,3.376101e-16,5.623582e-16,0.060837,1.422958,1.362121,3.192223,4.298610,-0.255734,,11.33427,,0.859305,3.252260,0.531147,True
467,2CXO J000002.2+003739,0.009569,0.627607,9010,393,334688,0.009488,0.627559,1,16.645277,41.143196,73.185360,96.355280,94.243515,471.945130,-10288.22500,500.42862,307.93832,51.844925,1.312389,0.357400,0.000570,0.000009,19.360170,18.405918,17.806309,17.535353,17.561327,15.814622,,372.359200,721.987850,526.959660,110.384155,56.341510,11.263417,-31.390830,0.0,0.0,0.0,0.0,0.0,0.0,0.341151,9010_334688_393,1.798867,1.079022,107.781279,4.000000,42.163113,TRUE,False,False,False,False,False,False,False,0.000000e+00,0.000000e+00,2.822468e-16,0.000000e+00,0.000000e+00,4.158212e-16,2.397099,0.000000e+00,5.025640e-16,True,19.446772,19.446772,19.360170,1.038825e-13,18.464255,18.464255,18.405918,17.838939,17.838939,17.806309,17.540311,17.540311,17.535353,14.841311,17.564371,17.564371,17.561327,14.225371,15.815271,15.815271,15.814622,10.641271,,,,,1.517792e-13,1.252579e-13,0.954252,1.553861,0.599609,0.270956,0.844591,-0.025974,1.991687,,,0.834392,2.315897,0.341151,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3759218,2CXO J235959.9-093415,359.999961,-9.570990,9010,126,275866,0.006568,-9.573772,1,0.174951,0.146608,0.480867,-0.749147,-0.790885,42.077930,268.48700,1168.01050,536.03150,46.908684,3.353836,0.693480,0.000758,0.000011,24.280678,24.509144,23.252726,,,18.439024,16.427374,5.979166,3.394312,3.293454,-1.371950,-0.658613,1.158537,0.889006,0.0,0.0,0.0,0.0,0.0,0.0,25.504534,9010_275866_126,1.398715,0.892273,125.934170,2.914286,32.507210,TRUE,False,False,False,False,False,False,False,,,,0.000000e+00,0.000000e+00,1.789800e-15,1.895908,,,True,24.392709,24.392709,24.280678,1.117736e-15,24.584606,,,23.294938,,,,,,,,,,,18.439864,,,,16.427692,,,,,,,,,,,,,,,1.274555,1.995042,0.429794,False
3759222,2CXO J235959.9-093415,359.999961,-9.570990,9010,77,275866,0.004126,-9.565175,1,0.320409,0.458374,0.564584,5.146428,5.210894,10.651087,-130.34381,1124.89280,516.53644,46.864258,3.236276,0.678429,0.000800,0.000011,23.623716,23.271490,23.078470,20.714823,20.703781,19.930676,,10.746337,10.417668,3.864998,9.258245,4.292045,0.301216,-0.432286,0.0,0.0,0.0,0.0,0.0,0.0,25.630295,9010_275866_77,1.398715,0.892273,125.934170,2.914286,32.507210,TRUE,False,False,False,False,False,False,False,,,,0.000000e+00,0.000000e+00,1.789800e-15,1.895908,,,True,23.735738,23.735738,23.623716,2.047029e-15,23.346950,23.346950,23.271490,23.120679,,,20.721235,20.721235,20.714823,18.022235,20.707719,20.707719,20.703781,17.368719,19.931515,,,,,,,,1.717843e-15,,0.352226,,,,2.567709,0.011042,,,,1.274555,1.995042,0.429794,False
3759215,2CXO J235959.9-093415,359.999961,-9.570990,9010,16,275866,0.001308,-9.578183,1,0.054978,0.174795,0.810630,2.560212,1.461625,-80.834130,546.94330,1666.53700,666.82610,94.697340,3.335823,0.690665,0.000758,0.000011,25.537966,24.318521,22.685905,21.472923,22.083988,,15.654828,2.244368,4.513738,7.888446,4.676032,1.214702,-2.225830,1.780722,0.0,0.0,0.0,0.0,0.0,0.0,26.331482,9010_275866_16,1.398715,0.892273,125.934170,2.914286,32.507210,TRUE,False,False,False,False,False,False,False,,,,0.000000e+00,0.000000e+00,1.789800e-15,1.895908,,,True,25.649528,,,,24.393677,24.393677,24.318521,22.727943,22.727943,22.685905,21.479310,21.479310,21.472923,18.780310,22.087910,,,,,,,,15.655144,,,,6.548941e-16,1.399480e-15,,,1.632616,1.212982,,,,,,1.274555,1.995042,0.429794,False
3759204,2CXO J235959.9-093415,359.999961,-9.570990,9010,4750,277287,359.992227,-9.570943,1,0.510895,0.845306,2.383923,9.835017,9.684865,-43.605340,-521.79517,296.89722,150.29869,10.622804,2.799752,0.596954,0.000740,0.000011,23.118132,22.607666,21.514930,20.011705,20.030863,,,8.803079,10.363147,7.769834,16.456402,7.482797,-1.186442,-1.703733,0.0,0.0,0.0,0.0,0.0,0.0,27.455064,9010_277287_4750,1.398715,0.892273,125.934170,2.914286,32.507210,TRUE,False,False,False,False,False,False,False,,,,0.000000e+00,0.000000e+00,1.789800e-15,1.895908,,,True,23.229171,23.229171,23.118132,3.261052e-15,22.682465,22.682465,22.607666,21.556769,21.556769,21.514930,20.018062,20.018062,20.011705,17.319062,20.034766,20.034766,20.030863,16.695766,,,,,,,,,3.166016e-15,4.114875e-15,0.510466,1.603202,1.092736,1.503225,2.576803,-0.019158,,,,1.274555,1.995042,0.429794,False
