# CSC/XMM validation catalogs, XMM/CSC hostless catalog

In [166]:
import sys
sys.path.append('../')

matplotlib settings set


In [167]:
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np

import matplotlib.pyplot as plt
# plt.style.use('seaborn')

import seaborn as sns

import json

from scripts.utils import set_mpl, data_path
from scripts.cross_match_scripts import cross_match_data_frames, fits_to_pandas, add_separation_columns
set_mpl()

%matplotlib inline
from astropy.table import Table

matplotlib settings set


In [168]:
def csc_error_converter(df: pd.DataFrame,
                        r0_colname='err_ellipse_r0',
                        r1_colname='err_ellipse_r1') -> pd.DataFrame:
    """
    The function converts default radii `r0_colname`
    and `r1_colname` to the one-sigma error.

    Args:
        df (pd.DataFrame): DataFrame with `r0_colname` and
        `r1_colname` columns.
        
        r0_colname (str): major radius of the 95% confidence
        level position error ellipse.
        Defaults to 'err_ellipse_r0'.
        
        r1_colname (str): minor radius of the 95% confidence
        level position error ellipse.
        Defaults to 'err_ellipse_r1'.

    Returns:
        pd.DataFrame: one-sigma error in arcseconds.
    """

    # Conversion coefficient
    csc_sigma_coeff = np.sqrt(-(1 / (2 * np.log(1 - .95))))

    err_r1 = df[r0_colname]
    err_r2 = df[r1_colname]
    # Effective error
    csc_err_eff = np.sqrt(err_r1 ** 2 + err_r2 ** 2)

    csc_1sigma = csc_sigma_coeff * csc_err_eff

    return csc_1sigma


def vot2pd_csc(csc_cat_path: str,
               colnames: list,
               save_coords: bool = False, 
               radec_fits_name: str = 'cscresults_name_radec') -> pd.DataFrame:
    """
    The function converts the votable file to pandas DataFrame.

    Optionally saves the separate file with the coordinates and names of
    the CSC sources to the FITS file.

    Args:
        csc_cat_path (str): the path to the votable file.
        colnames (list): column names of the votable file.
        save_coords (bool): if True, saves the separate file with the coordinates
        and names of the CSC sources to the FITS file.
        radec_fits_name (str): name of the FITS file with
        the coordinates and names of the CSC sources to be saved.

    Returns:
        pd.DataFrame: converted catalogue.
    """
    
    vot_table = Table.read(csc_cat_path, format='votable')
    
    df = vot_table.to_pandas()
    df.columns = colnames

    df = df.assign(flux_csc_05_2 = lambda x: x.flux_aper_s + x.flux_aper_m)

    one_sigma_errors = csc_error_converter(df)

    df = df.assign(radec_err_csc = one_sigma_errors)

    df['r_98_csc'] = tsource_r(sigma=df['radec_err_csc'], t_thresh=.02)

    if save_coords:

        df_init_coords = df[['name', 'ra', 'dec']]
        Table.from_pandas(df_init_coords).write(f'data/{radec_fits_name}.fits', format='fits')

    return df


def tsource_r(sigma: float, t_thresh: float) -> float:
    """
    Calculates the radius of circle which contains the probability `t_thresh`
    NOT to find a counterpart for a source with localization error `sigma`.

    Args:
        sigma (float): localization error in arcsec.
        t_thresh (float): probability to NOT find a counterpart.

    Returns:
        float: radius of circle in arcsec.
    """
    
    rsearch = sigma * np.sqrt(-2 * np.log(t_thresh))
    
    return rsearch


def fsource_r(rho: float, f_thresh: float) -> float:
    """
    The function calculates the radius which corresponds to the probability `f_thresh`
    to FIND one or more false sources. 

    Args:
        rho (float): the density of the false sources (in arcmin^{-2}).
        f_thresh (float): probability to FIND one or more false sources.

    Returns:
        float: radius (in arcsec).
    """

    rho = rho / 3600 # arcmin^{-2} -> arcsec^{-2}
    pf_r = np.sqrt(-1 / (rho * np.pi) * np.log(1 - f_thresh))
    
    return pf_r


def poserr2sigma_coeff(conf_level: float) -> float:
    """
    Calculates convertion coefficient to go from
    positional error to sigma given confidence level.

    For details see:
    https://www.notion.so/Theory-ca6e7795b40c43b4ba6d96bc59727efa#b4d9fc11ff8243a3834e9eeba08c2273
    """
    coeff = (-2 * np.log(1 - conf_level)) ** -0.5
    
    return coeff


def pos_r(sigma: float, conf_level: float) -> float:
    """
    Calculates the radius of circle which contains the probability `conf_level`
    of finding (opposite to tsource_r()) a counterpart for a source with
    localization error `sigma`.

    For details see:
    https://www.notion.so/Theory-ca6e7795b40c43b4ba6d96bc59727efa#0ba88df64d2d4d9583f93d63dbe1b927

    Args:
        sigma (float): localization error in arcsec.
        conf_level (float): probability of finding a counterpart.

    Returns:
        float: radius of circle in arcsec.
    """
    
    r_pos = sigma * np.sqrt(-2 * np.log(1 - conf_level))

    return r_pos


# def only_reliable_xmm(df: pd.DataFrame) -> pd.DataFrame:
#     """
#     Filters out unreliable XMM sources.
#     """
#     reliable_df = df[
#                     ((df['xmm_SC_SUM_FLAG'] == 0) |
#                     (df['xmm_SC_SUM_FLAG'] == 1)) &
#                     (df['xmm_SC_DET_ML'] > 10) &
#                     ~(df['xmm_SC_VAR_FLAG'] == True) &
#                     (df['xmm_SC_EXTENT'] == 0) &
#                     (df['xmm_CONFUSED'] == False)
#                     ]

#     return reliable_df


def only_reliable_xmm(df: pd.DataFrame) -> pd.DataFrame:
    """
    Filters out unreliable XMM sources.
    """
    reliable_df = df[
                    ((df['SC_SUM_FLAG'] == 0) |
                    (df['SC_SUM_FLAG'] == 1)) &
                    (df['SC_DET_ML'] > 10) &
                    ~(df['SC_VAR_FLAG'] == True) &
                    (df['SC_EXTENT'] == 0) &
                    (df['CONFUSED'] == False)
                    ]

    return reliable_df




def xray_filtration(df: pd.DataFrame,
                    DL_thresh: float = 6,
                    EL_thresh: float = 6,
                    verbouse=True) -> pd.DataFrame:
    """
    Filters X-ray sources.
    TODO: remake processing of duplicates
    """
    
    if verbouse:
        print(f'DET_LIKE_0 > {DL_thresh}')
        print(f'EXT_LIKE < {EL_thresh}')
        print()

        print(f'Before X-ray source filters: {len(df)}')

    df = df[(df['DET_LIKE_0'] > DL_thresh)&
            (df['EXT_LIKE'] < EL_thresh)]

    if verbouse:
        print(f'After X-ray source filters: {len(df)}')
        print()


    # Manually get rid of faint sources in duplicated pairs
    df = df[~((df['srcname_fin']=='SRGe J104659.3+573056')&(df['DET_LIKE_0'] < 20))]
    df = df[~((df['srcname_fin'] == 'SRGe J104700.7+574558')&(df['DET_LIKE_0'] < 20))]
    print('Weak ERO duplicates removed (temporary measure)')
    print()

    return df



srg_names = {
            'id_src_name': 'srcname_fin',  # Индексы рентгеновских источников
            'x_ra_name': 'RA_fin',  # Координаты рентгеновских источников
            'x_dec_name': 'DEC_fin',
            'dl_name': 'DET_LIKE_0',  # Detection Likelihood
            'x_flux_name': 'flux_05-20',
            'ext_name': 'EXT_LIKE',  # Протяженность рентгеновских источников
            'ls_ra_name': 'ra',  # Координаты источников DESI
            'ls_dec_name': 'dec',
            'r_98_name': 'pos_r98',  # Позиционная ошибка
            'sigma_2d_name': 'pos_sigma_2d'
            }


***

In [169]:
# Каталог ERO (9500)
ero_df = pd.read_pickle(data_path+'ERO_lhpv_03_23_sd01_a15_g14.pkl')
ero_df = xray_filtration(ero_df, DL_thresh=6, EL_thresh=6)
ero_df.sample(5)

DET_LIKE_0 > 6
EXT_LIKE < 6

Before X-ray source filters: 9215
After X-ray source filters: 9215

Weak ERO duplicates removed (temporary measure)



Unnamed: 0,srcname_fin,RA_fin,DEC_fin,GLON,GLAT,pos_r98,DET_LIKE_0,ML_FLUX_0,ML_FLUX_ERR_0,ML_CTS_0,ML_CTS_ERR_0,ML_EXP_1,EXT,EXT_LIKE,EXT_ERR,ID_SRC,ID_CLUSTER,RA,DEC,RADEC_ERR,DIST_NN,SRCDENS,TSTART,TSTOP,EXT_LOWERR,EXT_UPERR,ML_RATE_0,ML_RATE_ERR_0,ML_BKG_0,RA_corr,DEC_corr,g_id,g_d2d,g_nsrc,g_s,g_gmag,g_maxLx,g_b,s_id,s_z,s_otype,s_d2d,s_nsrc,flag_xray,flag_radio,flag_agn_wise,w1,w2,w3,w1snr,w2snr,w3snr,w_nsrc,sdss_nsrc,sdss_p,sdss_id,sdss_sp,sdss_d2d,hpidx,RADEC_ERR_fin,pos_sigma_2d,pos_r68,pos_r95,ELON,ELAT,flux_05-20,NH
100,SRGe J104833.7+600846,162.140374,60.146233,146.724178,50.926237,2.454162,1249.321655,1.118977e-13,5.439437e-15,483.172333,23.487394,3944.830811,0.0,0.0,0.0,111,90,162.142382,60.144943,0.688018,114.855286,8e-06,626841300.0,627070100.0,0.0,0.0,0.122482,0.005954,16.488338,162.140374,60.146233,855664859545785088,5.21986,1,0,18.330484,-1.0,0,4C 60.15,1.72192,QSO,5.219486,1,0,1,1,14.921,13.862,10.917,33.6,31.7,11.8,1,1,1,1237653617471455280,631677714998257664,5.207364,54714948779,0.688018,0.87738,1.324485,2.147603,134.32728,47.307597,8.610965e-14,7.69179e+19
7969,SRGe J104358.5+591602,160.993868,59.26733,148.374146,51.097318,10.803185,6.497766,3.995153e-15,1.422697e-15,18.637232,6.636824,4261.82373,0.0,0.0,0.0,9283,8127,160.995877,59.26604,4.758516,88.625595,5e-06,626841300.0,627070100.0,0.0,0.0,0.004373,0.001557,18.791277,160.993868,59.26733,-1,-1.0,0,-1,,-1.0,-1,[OM2008] 71,0.64,Galaxy,9.946543,1,0,1,-1,,,,,,,0,0,-1,0,0,,57905412116,4.758516,3.862212,5.830365,9.453717,134.329979,46.255524,3.074426e-15,8.622823e+19
7306,SRGe J104840.0+592044,162.166524,59.345599,147.636734,51.486891,8.973891,6.127536,4.500355e-15,1.601998e-15,21.436777,7.630881,4351.712891,0.0,0.0,0.0,5270,4302,162.168533,59.34431,3.923913,31.47052,6e-06,626841300.0,627070100.0,0.0,0.0,0.004926,0.001754,77.086388,162.166524,59.345599,-1,-1.0,0,-1,,-1.0,-1,,,,-1.0,0,0,0,-1,,,,,,,0,1,0,1237653616397583066,0,7.014286,57617870015,3.923913,3.208227,4.843114,7.852928,134.994616,46.64889,3.463199e-15,8.114063e+19
1749,SRGe J105137.3+594503,162.905428,59.750873,146.750842,51.476515,4.249956,97.413376,1.714708e-14,2.267027e-15,80.576065,10.653011,4293.023926,0.0,0.0,0.0,2034,1596,162.907437,59.749583,1.698468,106.778435,4e-06,626841300.0,627070100.0,0.0,0.0,0.018769,0.002481,19.432869,162.905428,59.750873,861405723288256128,3.649846,1,0,20.007261,-1.0,0,,,,-1.0,0,0,0,1,16.801001,15.403,12.483,12.7,13.0,2.8,1,1,1,1237655368746402112,0,3.670234,56139941539,1.698468,1.519388,2.293656,3.719077,135.12085,47.193909,1.319535e-14,7.694348e+19
2840,SRGe J102955.5+584612,157.481109,58.770098,150.776974,50.008371,10.284125,11.767325,1.167201e-14,3.596838e-15,18.897161,5.823334,1479.102783,0.0,0.0,0.0,8040,6897,157.483118,58.768808,4.522289,156.027771,1.2e-05,626841300.0,627070100.0,0.0,0.0,0.012776,0.003937,7.542814,157.481109,58.770098,-1,-1.0,0,-1,,-1.0,-1,,,,-1.0,0,0,1,-1,,,,,,,0,0,-1,0,0,,59747287953,4.522289,3.676645,5.550234,8.999495,132.519257,44.87375,8.982069e-15,8.098706e+19


In [170]:
# desi_lh.gz_pkl number of sources and area
false_dens_arcmin = 2418574 / (41.729 * 3600)
print(false_dens_arcmin)
false_dens_arcsec = false_dens_arcmin / 3600

print(f'{ false_dens_arcsec:.3f}')

FALSE_PROB = .03

r_false = fsource_r(false_dens_arcmin, FALSE_PROB)

print(f'Радиус для {FALSE_PROB:.0%} вероятности найти ложный: {r_false:.2f}"')

desi = pd.read_pickle(data_path+'desi_lh.gz_pkl', compression='gzip')
desi.sample(5)

16.099741453452303
0.004
Радиус для 3% вероятности найти ложный: 1.47"


Unnamed: 0,release,objid,brickid,ra,dec,flux_g,flux_r,flux_z,flux_w1,flux_w2,flux_w3,flux_w4,flux_ivar_g,flux_ivar_r,flux_ivar_z,flux_ivar_w1,flux_ivar_w2,flux_ivar_w3,flux_ivar_w4,dered_mag_g,dered_mag_r,dered_mag_z,dered_mag_w1,dered_mag_w2,dered_mag_w3,dered_mag_w4,snr_g,snr_r,snr_z,snr_w1,snr_w2,snr_w3,snr_w4,type,parallax,parallax_ivar,pmra,pmra_ivar,pmdec,pmdec_ivar,ref_cat,ref_id,mjd_max,mjd_min,iso_max,iso_min,desi_id
2417730,9011,1713,623750,161.583514,62.351654,0.251578,0.417521,0.776225,1.383439,-0.588176,-18.02381,-492.36343,511.0062,192.73701,85.14385,4.780471,1.162203,0.00139,1.7e-05,23.965834,23.426422,22.76279,22.14574,,,,5.687028,5.796428,7.162492,3.024791,-0.634087,-0.671974,-2.020457,REX,0.0,0.0,0.0,0.0,0.0,0.0,,0,58245.1833,57821.352569,2018-05-07 04:23:57.150,2017-03-09 08:27:42.000,9011_623750_1713
2110043,9011,1896,608366,154.627907,57.006773,0.237964,0.798586,1.517187,6.640104,8.046729,11.121643,873.9268,189.44673,44.098568,9.888788,3.648615,0.908066,0.001316,1.8e-05,24.02612,22.722233,22.035116,20.442696,20.234806,19.884333,15.14622,3.275322,5.303148,4.771015,12.683494,7.667931,0.403487,3.666525,REX,0.0,0.0,0.0,0.0,0.0,0.0,,0,57897.157003,57824.396701,2017-05-24 03:46:05.070,2017-03-12 09:31:15.000,9011_608366_1896
450933,9011,1921,615992,159.166055,59.521332,0.07048,0.230029,0.553417,1.839941,0.207213,-13.406902,-220.3335,754.8913,299.18396,109.664505,4.671866,1.116583,0.001297,1.6e-05,25.3526,24.077194,23.132103,21.83643,24.207998,,,1.936445,3.978789,5.795433,3.976941,0.218959,-0.48292,-0.894629,PSF,0.0,0.0,0.0,0.0,0.0,0.0,,0,58194.411692,57842.282463,2018-03-17 09:52:50.192,2017-03-30 06:46:44.821,9011_615992_1921
1569929,9011,426,605194,164.897744,56.016441,0.188612,0.721398,0.970778,1.607445,-0.032889,-26.13183,-122.266365,331.07498,138.05817,52.0315,4.913017,1.206946,0.001182,1.5e-05,24.27522,22.830412,22.51869,21.982607,,,,3.431892,8.476291,7.002503,3.562953,-0.036133,-0.898581,-0.46693,REX,0.0,0.0,0.0,0.0,0.0,0.0,,0,58155.236609,57519.173241,2018-02-06 05:40:43.000,2016-05-11 04:09:28.000,9011_605194_426
661860,9011,116,613777,165.610744,58.743334,0.044361,0.095096,0.515483,5.781983,4.251467,19.067614,-271.8659,753.51044,172.88173,107.28798,4.496749,1.100075,0.001056,1.3e-05,25.85156,25.033754,23.20781,20.593037,20.927565,19.299026,,1.217718,1.250366,5.339364,12.261007,4.459127,0.619548,-0.971739,PSF,0.0,0.0,0.0,0.0,0.0,0.0,,0,57916.225903,57809.421548,2017-06-12 05:25:18.000,2017-02-25 10:07:01.770,9011_613777_116


# CSC

In [171]:
# CSC table conversion
csc_columns = ['name', 'ra', 'dec', 'err_ellipse_r0', 'err_ellipse_r1', 'err_ellipse_ang',
               'significance', 'likelihood', 'likelihood_class', 'conf_flag', 'dither_warning_flag',
               'extent_flag', 'pileup_flag', 'sat_src_flag', 'streak_src_flag', 'var_flag',
               'flux_aper_s', 'flux_aper_lolim_s', 'flux_aper_hilim_s', 'flux_aper_m',
               'flux_aper_lolim_m', 'flux_aper_hilim_m']

# Filtration
# data/cscresults.vot obtained via CSCview software
csc_init_df = vot2pd_csc(csc_cat_path=data_path+'cscresults.vot', colnames=csc_columns)

# CSC catalogue filtering
csc_df = csc_init_df[(csc_init_df['conf_flag'] == False) &
                              (csc_init_df['extent_flag'] == False) &
                              (csc_init_df['sat_src_flag'] == False) &
                              (csc_init_df['streak_src_flag'] == False) &
                              (csc_init_df['pileup_flag'] == False) &
                              (csc_init_df['dither_warning_flag'] == False)]


csc_df = csc_df[(csc_df['likelihood'] > 10) &
                                  (csc_df['likelihood'] < 10 ** 10)]


csc_df = csc_df.assign(flux_05_2 = lambda x: x.flux_aper_s + x.flux_aper_m)

# Ошибки на поток 0.5-2 кэВ
s_up = csc_df.flux_aper_hilim_s - csc_df.flux_aper_s
s_down = csc_df.flux_aper_s - csc_df.flux_aper_lolim_s
s_err = np.sqrt(s_up ** 2 + s_down ** 2)
m_up = csc_df.flux_aper_hilim_m - csc_df.flux_aper_m
m_down = csc_df.flux_aper_m - csc_df.flux_aper_lolim_m
m_err = np.sqrt(m_up ** 2 + m_down ** 2)

sm_err = np.sqrt(s_err ** 2 + m_err ** 2).values
csc_df['flux_aper_sm_err'] = sm_err


print(f'CSC Sources: {csc_df.shape[0]}')
csc_df.sample(5)

CSC Sources: 267265


Unnamed: 0,name,ra,dec,err_ellipse_r0,err_ellipse_r1,err_ellipse_ang,significance,likelihood,likelihood_class,conf_flag,dither_warning_flag,extent_flag,pileup_flag,sat_src_flag,streak_src_flag,var_flag,flux_aper_s,flux_aper_lolim_s,flux_aper_hilim_s,flux_aper_m,flux_aper_lolim_m,flux_aper_hilim_m,flux_csc_05_2,radec_err_csc,r_98_csc,flux_05_2,flux_aper_sm_err
295902,2CXO J212920.6-073624,322.336008,-7.60694,1.757718,1.324313,91.624972,4.722222,53.325252,TRUE,False,False,False,False,False,False,False,4.02802e-15,2.364273e-15,5.691768e-15,3.152772e-15,2.000797e-15,4.244116e-15,7.180792e-15,0.899099,2.514915,7.180792e-15,2.83799e-15
218581,2CXO J163534.1-471125,248.892101,-47.190548,2.168761,2.168758,0.0,2.210526,19.067098,TRUE,False,False,False,False,False,False,False,1.857474e-15,8.03232e-16,2.861514e-15,0.0,0.0,4.7169e-16,1.857474e-15,1.253025,3.5049,1.857474e-15,1.530364e-15
313035,2CXO J232952.9-000216,352.470618,-0.037977,2.785942,1.532515,126.951676,2.918919,26.247303,TRUE,False,False,False,False,False,False,False,5.306255e-15,2.195692e-15,8.416819e-15,2.42606e-15,6.9316e-16,4.043433e-15,7.732315e-15,1.299005,3.63351,7.732315e-15,4.997004e-15
49377,2CXO J040632.2-481311,61.63435,-48.219929,5.499777,5.190597,149.724394,2.486486,11.307976,MARGINAL,False,False,False,False,False,False,False,7.428962e-15,3.018016e-15,1.183991e-14,3.693696e-16,1.6789530000000002e-17,7.219496e-16,7.798332e-15,3.089534,8.641888,7.798332e-15,6.257917e-15
180204,2CXO J135316.5+332405,208.318973,33.401548,0.945767,0.814971,55.982974,8.315803,255.945802,TRUE,False,False,False,False,False,False,False,6.266473e-15,4.897933e-15,7.562985e-15,5.865245e-15,4.776794e-15,6.953697e-15,1.213172e-14,0.510045,1.426671,1.213172e-14,2.433783e-15


## CSC x ERO cross-match

In [172]:
ero_csc = cross_match_data_frames(ero_df, csc_df,
 'RA_fin', 'DEC_fin', 'ra', 'dec', match_radius = 30, df_prefix = 'csc')
ero_csc = ero_csc.query("csc_n_near==1 & csc_n_matches==1")
print('cross-matches: only one csc within 30'' and it is unique', ero_csc.shape[0])
assert len(ero_csc) == ero_csc.csc_name.nunique()

ero_csc.sample(5)

cross-match radius 30 arcsec
total matches: 739 out of 9215 x 267265
	 total unique pairs: 692
	 total non-unique pairs (duplicates in df2): 47
cross-matches: only one csc within 30 and it is unique 566


Unnamed: 0,srcname_fin,RA_fin,DEC_fin,GLON,GLAT,pos_r98,DET_LIKE_0,ML_FLUX_0,ML_FLUX_ERR_0,ML_CTS_0,ML_CTS_ERR_0,ML_EXP_1,EXT,EXT_LIKE,EXT_ERR,ID_SRC,ID_CLUSTER,RA,DEC,RADEC_ERR,DIST_NN,SRCDENS,TSTART,TSTOP,EXT_LOWERR,EXT_UPERR,ML_RATE_0,ML_RATE_ERR_0,ML_BKG_0,RA_corr,DEC_corr,g_id,g_d2d,g_nsrc,g_s,g_gmag,g_maxLx,g_b,s_id,s_z,s_otype,s_d2d,s_nsrc,flag_xray,flag_radio,flag_agn_wise,w1,w2,w3,w1snr,w2snr,w3snr,w_nsrc,sdss_nsrc,sdss_p,sdss_id,sdss_sp,sdss_d2d,hpidx,RADEC_ERR_fin,pos_sigma_2d,pos_r68,pos_r95,ELON,ELAT,flux_05-20,NH,csc_name,csc_ra,csc_dec,csc_err_ellipse_r0,csc_err_ellipse_r1,csc_err_ellipse_ang,csc_significance,csc_likelihood,csc_likelihood_class,csc_conf_flag,csc_dither_warning_flag,csc_extent_flag,csc_pileup_flag,csc_sat_src_flag,csc_streak_src_flag,csc_var_flag,csc_flux_aper_s,csc_flux_aper_lolim_s,csc_flux_aper_hilim_s,csc_flux_aper_m,csc_flux_aper_lolim_m,csc_flux_aper_hilim_m,csc_flux_csc_05_2,csc_radec_err_csc,csc_r_98_csc,csc_flux_05_2,csc_flux_aper_sm_err,csc_sep,csc_n_near,csc_n_matches
277,SRGe J104629.0+584350,161.620911,58.73063,148.67171,51.695545,5.179706,45.472172,1.046158e-14,1.81921e-15,48.873528,8.498833,4267.996582,0.0,0.0,0.0,2406,1902,161.62292,58.72934,2.154263,41.585388,7e-06,626841300.0,627070100.0,0.0,0.0,0.011451,0.001991,17.750046,161.620911,58.73063,-1,-1.0,0,-1,,-1.0,-1,CLANS 453,2.07,QSO,4.775476,1,1,0,-1,,,,,,,0,1,0,1237655109446533488,0,4.30849,59894646278,2.154263,1.85178,2.795433,4.532689,135.141916,45.980592,8.050591e-15,7.909173e+19,2CXO J104629.4+584350,161.622831,58.730594,0.769258,0.72689,3.040887,8.149069,446.307967,True,False,False,False,False,False,False,False,3.86777e-15,3.111999e-15,4.579084e-15,2.26885e-15,1.809678e-15,2.701012e-15,6.136621e-15,0.432381,1.209435,6.136621e-15,1.214397e-15,3.589409,1,1
540,SRGe J105126.5+571133,162.860576,57.192536,149.827767,53.212727,10.191352,12.794593,5.457357e-15,1.856992e-15,25.448132,8.659314,4260.111816,0.0,0.0,0.0,8379,7232,162.862585,57.191246,4.480025,162.477661,9e-06,626841300.0,627070100.0,0.0,0.0,0.005974,0.002033,19.162315,162.860576,57.192536,854176228177285760,7.100457,1,1,17.395212,-1.0,0,RDS 61B,0.592,AGN,6.371191,1,1,1,0,15.534,15.115,12.664,28.0,15.5,0.1,2,3,1,1237655107299049570,0,7.128704,65766180604,4.480025,3.643477,5.500165,8.91831,137.117915,45.042081,4.199649e-15,6.616957e+19,2CXO J105126.2+571130,162.859225,57.191932,2.100941,2.100939,0.0,2.666667,84.723298,True,False,False,False,False,False,False,False,4.11558e-15,1.815697e-15,6.415464e-15,2.232178e-15,5.357227e-16,3.928633e-15,6.347758e-15,1.213842,3.395298,6.347758e-15,4.041639e-15,3.415473,1,1
234,SRGe J104604.6+584424,161.519056,58.739927,148.717363,51.649946,5.80456,53.263432,1.268957e-14,2.087466e-15,60.201984,9.903375,4334.224121,0.0,0.0,0.0,2454,1939,161.521064,58.738638,2.452672,131.308472,6e-06,626841300.0,627070100.0,0.0,0.0,0.01389,0.002285,18.390734,161.519056,58.739927,855293400709705088,9.511575,1,0,21.244097,-1.0,0,CLANS 397,1.81,QSO,9.511712,1,1,0,1,17.410999,15.957,12.295,7.4,7.7,0.5,1,2,0,1237655109446467838,0,8.503062,59860040492,2.452672,2.07517,3.13266,5.07949,135.070783,45.959697,9.76512e-15,7.857937e+19,2CXO J104604.0+584424,161.516774,58.740109,0.865831,0.776664,15.442971,10.266414,543.170066,True,False,False,False,False,False,False,False,6.272374e-15,5.22116e-15,7.265187e-15,4.759875e-15,4.055126e-15,5.464624e-15,1.103225e-14,0.475184,1.32916,1.103225e-14,1.756152e-15,4.312858,1,1
424,SRGe J105302.4+573758,163.259801,57.632862,149.044304,53.073864,6.903723,19.078884,6.723091e-15,1.63616e-15,31.813745,7.742331,4323.080566,0.0,0.0,0.0,5639,4635,163.26181,57.631573,2.968746,134.703964,4e-06,626841300.0,627070100.0,0.0,0.0,0.007359,0.001791,20.686918,163.259801,57.632862,-1,-1.0,0,-1,,-1.0,-1,2XMM J105302.4+573756,1.88558,Seyfert_1,4.641915,1,1,1,0,17.493,16.995001,12.762,7.9,1.4,0.2,1,1,1,1237658302742790493,9215494160714584064,4.639355,64059772633,2.968746,2.468128,3.725867,6.041352,137.03648,45.528738,5.173681e-15,6.943349e+19,2CXO J105302.4+573756,163.260051,57.63247,0.884681,0.791848,74.00019,15.202441,794.057273,True,False,False,False,False,False,False,True,,,,,,,,0.485059,1.356782,,,1.491775,1,1
143,SRGe J103331.3+581945,158.380436,58.329291,150.885329,50.648443,4.661411,93.368805,1.818996e-14,2.408294e-15,83.606071,11.069179,4199.073242,0.0,0.0,0.0,1692,1334,158.382445,58.328002,1.902422,125.421326,8e-06,626841300.0,627070100.0,0.0,0.0,0.019911,0.002636,18.616566,158.380436,58.329291,855041749986966784,4.902095,1,0,21.106846,-1.0,0,,,,-1.0,0,1,0,0,15.86,15.147,12.72,24.3,15.1,2.3,1,2,0,1237658304889291585,0,5.282011,61402652957,1.902422,1.666486,2.515714,4.079136,133.407158,44.740913,1.399789e-14,6.231856e+19,2CXO J103331.3+581943,158.380536,58.328846,0.954585,0.790875,7.807112,8.246487,287.954529,True,False,False,False,False,False,False,False,1.851919e-14,1.388939e-14,2.314898e-14,1.594231e-14,1.275385e-14,1.913077e-14,3.44615e-14,0.506443,1.416596,3.44615e-14,7.950007e-15,1.614607,1,1


## CSC x DESI cross-match

In [173]:
csc_desi = cross_match_data_frames(ero_csc, desi, colname_ra1 = 'csc_ra',colname_dec1 = 'csc_dec' , colname_ra2 = 'ra', colname_dec2=  'dec', match_radius = 15, df_prefix = 'desi')

cross-match radius 15 arcsec
total matches: 2543 out of 566 x 2418574
	 total unique pairs: 2543
	 total non-unique pairs (duplicates in df2): 0


In [174]:
csc_desi_closest = (csc_desi
    .loc[csc_desi.groupby('csc_name')['desi_sep'].idxmin()]
    )


csc_hostless = (csc_desi_closest[
    csc_desi_closest['desi_sep'] > 2 * csc_desi_closest['csc_r_98_csc']
    ])
csc_hostless = csc_hostless.query('csc_sep<20')
csc_hostless['csc_ero_flux_ratio'] = csc_hostless['csc_flux_csc_05_2'] / csc_hostless['flux_05-20']

print('hostless csc near chandra sources', csc_hostless.shape[0])
csc_hostless = csc_hostless[['srcname_fin',	'RA_fin',	'DEC_fin', 'pos_r98', 'flux_05-20', 'csc_name', 'csc_ra',	'csc_dec', 'csc_r_98_csc', 'csc_flux_05_2', 'csc_sep', 'desi_desi_id', 'desi_ra', 'desi_dec', 'desi_sep', 'csc_ero_flux_ratio']]

csc_hostless.rename(columns = {'csc_sep':'ero_csc_sep', 'desi_sep':'desi_sep_minimal'}, inplace = True)

csc_hostless = add_separation_columns(csc_hostless, 'RA_fin', 'DEC_fin', 'desi_ra', 'desi_dec', 'desi_ero_sep')

assert csc_hostless.srcname_fin.nunique()==csc_hostless.csc_name.nunique()
assert csc_hostless.srcname_fin.nunique()==csc_hostless.shape[0]
csc_hostless.sample(7)

hostless csc near chandra sources 29


Unnamed: 0,srcname_fin,RA_fin,DEC_fin,pos_r98,flux_05-20,csc_name,csc_ra,csc_dec,csc_r_98_csc,csc_flux_05_2,ero_csc_sep,desi_desi_id,desi_ra,desi_dec,desi_sep_minimal,csc_ero_flux_ratio,desi_ero_sep
1790,SRGe J103121.9+573134,157.841091,57.526169,8.679921,4.350933e-15,2CXO J103121.9+573134,157.841331,57.526115,2.33175,3.862979e-15,0.504317,9011_609939_482,157.839339,57.524106,8.192553,0.887851,8.162275
2344,SRGe J104410.7+585421,161.04443,58.905714,10.203106,3.030344e-15,2CXO J104411.1+585424,161.046365,58.906818,1.16178,6.099061e-15,5.36133,9011_614515_2457,161.0457,58.906094,2.884135,2.012663,2.729407
1495,SRGe J104854.4+573926,162.226706,57.657139,8.499,5.270004e-15,2CXO J104853.5+573920,162.222929,57.655774,2.009572,7.151569e-15,8.777265,9011_610723_3980,162.217314,57.657324,12.169596,1.357033,18.101054
1363,SRGe J105118.0+552353,162.824877,55.398127,10.75316,6.044226e-15,2CXO J105117.8+552354,162.82426,55.398573,2.010836,3.740731e-15,2.045384,9011_603560_4390,162.820078,55.397071,10.114483,0.618893,10.520464
870,SRGe J103220.4+573211,158.084926,57.53645,5.714911,9.351694e-15,2CXO J103220.2+573211,158.084192,57.536421,1.498974,4.85286e-15,1.42118,9011_609939_2890,158.088107,57.538123,9.735829,0.518928,8.606599
1362,SRGe J103302.4+580241,158.260049,58.044662,6.663467,6.085609e-15,2CXO J103302.7+580240,158.261655,58.044693,2.090497,5.264955e-15,3.061539,9011_611484_2685,158.264079,58.04597,6.516825,0.865148,9.006889
2380,SRGe J104706.2+585144,161.77586,58.862345,10.398229,2.946539e-15,2CXO J104706.8+585149,161.778743,58.863727,2.237806,1.771158e-15,7.317357,9011_613769_176,161.776663,58.864456,4.675892,0.601098,7.74247


***

# 4XMM DR10

## XMM x EROSITA (done in topcat)

In [175]:
ero_xmm = pd.read_csv(data_path+'xmm_allsky_full_ero_slim_point_30sec.csv')
ero_xmm = only_reliable_xmm(ero_xmm) #TODO CHECK IF XMM IS RELIABLE

print(f'Reliable XMM Sources within 30 arcsec from eROSITA: {ero_xmm.shape[0]}')

# Поток 0.5-2 кэВ
ero_xmm = ero_xmm.assign(flux_05_2 = lambda x: x.SC_EP_2_FLUX + x.SC_EP_3_FLUX)
# Ошибки на поток 0.5-2 кэВ
ero_xmm = ero_xmm.assign(flux_05_2_err = lambda x: np.sqrt(x.SC_EP_2_FLUX_ERR ** 2 + x.SC_EP_3_FLUX_ERR ** 2))

xmm_err = ero_xmm['SC_POSERR']
xmm_sigma_coeff = poserr2sigma_coeff(.63)
xmm_sigma = xmm_sigma_coeff * xmm_err
ero_xmm.insert(11, 'sigma', xmm_sigma)
# pos_r98
xmm_r98 = pos_r(xmm_sigma, .98)
ero_xmm.insert(11, 'xmm_pos_r98', xmm_r98)


ero_xmm['xmm_ero_flux_ratio'] = ero_xmm['flux_05_2']\
                                                / ero_xmm['flux_05-20']



ero_xmm['GroupSize'] = ero_xmm['GroupSize'].fillna(1)
ero_xmm = ero_xmm[ero_xmm['GroupSize']==1]

print(f'XMM Sources after filters (only one within 30 arcsec): {ero_xmm.shape[0]}')



ero_xmm.sample(10)

Reliable XMM Sources within 30 arcsec from eROSITA: 844
XMM Sources after filters (only one within 30 arcsec): 740


Unnamed: 0,srcname_fin,RA_fin,DEC_fin,flux_05-20,pos_sigma_2d,pos_r98,DET_LIKE_0,EXT_LIKE,pos_r98_corr,SRCID,IAUNAME,xmm_pos_r98,sigma,SC_RA,SC_DEC,SC_POSERR,SC_DET_ML,SC_EP_1_FLUX,SC_EP_1_FLUX_ERR,SC_EP_2_FLUX,SC_EP_2_FLUX_ERR,SC_EP_3_FLUX,SC_EP_3_FLUX_ERR,SC_EP_4_FLUX,SC_EP_4_FLUX_ERR,SC_EP_5_FLUX,SC_EP_5_FLUX_ERR,SC_EP_8_FLUX,SC_EP_8_FLUX_ERR,SC_EP_9_FLUX,SC_EP_9_FLUX_ERR,SC_HR1,SC_HR1_ERR,SC_HR2,SC_HR2_ERR,SC_HR3,SC_HR3_ERR,SC_HR4,SC_HR4_ERR,SC_EXTENT,SC_EXT_ERR,SC_EXT_ML,SC_CHI2PROB,SC_FVAR,SC_FVARERR,SC_VAR_FLAG,SC_SUM_FLAG,SC_EP_8_FMIN,SC_EP_8_FMIN_ERR,SC_EP_8_FMAX,SC_EP_8_FMAX_ERR,MJD_FIRST,MJD_LAST,N_DETECTIONS,CONFUSED,WEBPAGE_URL,GroupID,GroupSize,Separation,flux_05_2,flux_05_2_err,xmm_ero_flux_ratio
192,SRGe J104727.7+571831,161.865499,57.308498,1.650853e-14,1.509007,4.220919,127.0903,0.0,5.0,205562116010025,4XMM J104727.0+571831,3.514886,1.256596,161.862696,57.308639,1.77198,38.1592,5.04562e-15,1.68423e-15,9.83083e-15,2.67397e-15,4.02108e-15,1.95185e-15,6.05475e-15,5.0233e-15,6.92729e-14,5.39908e-14,1.06807e-13,5.47217e-14,2.57787e-14,6.03272e-15,0.299822,0.179718,-0.580108,0.161433,-0.340555,0.360543,0.47877,0.355806,0.0,,3.07447,,,,,1,1.06807e-13,5.47217e-14,1.06807e-13,5.47217e-14,54799.044537,54799.125949,1,False,http://xmm-catalog.irap.omp.eu/source/20556211...,,1.0,5.474113,1.385191e-14,3.310564e-15,0.839076
11,SRGe J104540.1+584254,161.417075,58.715136,8.319905e-14,0.886571,2.479872,1246.3405,0.0,5.0,205541207010003,4XMM J104540.1+584253,2.37313,0.84841,161.417391,58.714982,1.19638,360.053,3.79218e-14,4.33132e-15,2.81271e-14,3.87794e-15,2.14672e-14,4.32305e-15,4.67099e-14,1.22581e-14,4.18768e-14,5.57458e-14,1.76103e-13,5.75356e-14,1.03201e-13,1.08375e-14,-0.178359,0.086671,-0.282508,0.112288,-0.157503,0.161293,-0.577401,0.45223,0.0,,-2.37222,0.606038,,,False,0,1.76103e-13,5.75356e-14,1.76103e-13,5.75356e-14,54932.398796,54932.883727,1,False,http://xmm-catalog.irap.omp.eu/source/20554120...,,1.0,0.810623,4.95943e-14,5.807511e-15,0.596092
904,SRGe J105535.0+573333,163.895645,57.559164,3.096694e-15,3.627613,10.146975,7.414317,0.0,11.161673,206060301010023,4XMM J105534.7+573336,1.299189,0.464469,163.894949,57.560197,0.654968,91.1849,3.05648e-15,3.86485e-16,2.91881e-15,3.9588e-16,1.55264e-15,3.90875e-16,1.28573e-15,6.61351e-16,2.03598e-15,3.72558e-15,1.33802e-14,3.9723e-15,9.12586e-15,9.76773e-16,-0.099226,0.08577,-0.268188,0.108322,-0.697995,0.115629,0.187763,0.288304,0.0,,-0.883701,0.581626,,,False,0,1.18943e-14,4.5836e-15,2.12634e-14,1.02301e-14,55121.968056,55299.898669,3,False,http://xmm-catalog.irap.omp.eu/source/20606030...,,1.0,3.952828,4.47145e-15,5.563311e-16,1.443943
93,SRGe J104416.0+590102,161.066806,59.017117,2.8941e-14,1.16418,3.256386,302.54837,0.0,5.0,205541201010004,4XMM J104415.8+590101,0.730295,0.261085,161.065835,59.017058,0.368168,472.002,5.46281e-15,4.6787e-16,9.78667e-15,5.9789e-16,1.05723e-14,6.95325e-16,1.69607e-14,1.59385e-15,1.96464e-14,6.65734e-15,6.87327e-14,7.06312e-15,3.80461e-14,1.54797e-15,0.1257,0.043891,0.008448,0.042135,-0.299467,0.051007,-0.418552,0.104595,0.0,,-1.40459,0.057877,0.370453,0.17538,False,0,6.17245e-14,1.52347e-14,7.47402e-14,1.41461e-14,54750.880949,54932.883727,4,False,http://xmm-catalog.irap.omp.eu/source/20554120...,,1.0,1.812011,2.035897e-14,9.170329e-16,0.703465
215,SRGe J104804.9+572145,162.020426,57.3625,1.469379e-14,1.58439,4.431776,100.17839,0.0,5.0,205562116010044,4XMM J104805.1+572145,3.624023,1.295613,162.021274,57.362676,1.827,14.5161,4.54447e-15,1.63968e-15,3.53518e-15,1.75388e-15,3.92042e-15,2.09035e-15,2.38105e-15,3.64417e-15,2.33793e-14,3.23123e-14,4.57863e-14,3.29544e-14,1.69876e-14,5.01e-15,-0.125809,0.28132,-0.081899,0.338075,-0.26956,0.333089,0.11723,0.46587,0.0,,-0.614068,,,,,0,4.57863e-14,3.29544e-14,4.57863e-14,3.29544e-14,54799.044537,54799.125949,1,False,http://xmm-catalog.irap.omp.eu/source/20556211...,,1.0,1.765344,7.4556e-15,2.728673e-15,0.507398
857,SRGe J104245.8+593123,160.690774,59.523013,3.386523e-15,3.19332,8.932193,11.159896,0.0,9.825412,205562129010020,4XMM J104245.0+593119,2.479034,0.886272,160.687915,59.52217,1.24977,30.6547,1.2756e-15,7.65782e-16,3.12644e-15,1.05726e-15,4.87854e-15,1.39186e-15,2.48926e-15,2.30361e-15,1.19496e-14,1.70238e-14,3.17461e-14,1.74823e-14,1.60136e-14,3.03443e-15,0.166724,0.186343,0.153452,0.170789,-0.470065,0.202179,0.235316,0.427163,0.0,,0.243181,,,,,1,2.45478e-14,1.89452e-14,7.303e-14,4.53707e-14,54797.077187,54803.128889,2,False,http://xmm-catalog.irap.omp.eu/source/20556212...,,1.0,6.036838,8.00498e-15,1.747877e-15,2.363776
9,SRGe J103353.1+584655,158.471063,58.781974,8.768102e-14,0.88595,2.478135,1224.6747,0.0,5.0,201429701010001,4XMM J103352.8+584654,0.962738,0.344186,158.47022,58.781891,0.485351,3168.61,2.43582e-14,2.16011e-15,7.5247e-14,3.8112e-15,6.5123e-14,3.72756e-15,5.76498e-14,6.25233e-15,1.14464e-13,1.8937e-14,3.40889e-13,2.08305e-14,2.35879e-13,8.43776e-15,0.488579,0.038732,-0.126684,0.037394,-0.525395,0.044345,-0.35815,0.085928,0.0,,-2.98315,0.004792,0.26695,0.073049,False,0,3.40889e-13,2.08305e-14,3.40889e-13,2.08305e-14,52930.032037,52930.306516,1,False,http://xmm-catalog.irap.omp.eu/source/20142970...,,1.0,1.603017,1.4037e-13,5.331036e-15,1.600917
672,SRGe J103254.1+574157,158.225618,57.699169,5.032301e-15,3.093531,8.653069,19.769396,0.0,9.518376,203032602010042,4XMM J103253.7+574150,1.598694,0.571544,158.224119,57.697247,0.805959,113.598,5.64419e-16,1.56795e-16,1.17641e-15,2.2666e-16,2.52084e-15,3.52483e-16,2.94225e-15,6.69507e-16,9.29807e-16,2.35796e-15,8.48202e-15,2.52708e-15,6.44968e-15,6.53531e-16,0.33515,0.118183,0.25544,0.105224,-0.427975,0.105995,-0.688542,0.315602,0.0,,2.61715,0.969394,,,False,0,7.56177e-15,2.83769e-15,1.20088e-14,5.55525e-15,53467.914178,53510.606285,2,False,http://xmm-catalog.irap.omp.eu/source/20303260...,,1.0,7.494595,3.69725e-15,4.190692e-16,0.734704
171,SRGe J103907.5+574921,159.781398,57.822516,1.794553e-14,1.437737,4.021565,150.6285,0.0,5.0,205562109010013,4XMM J103907.5+574922,3.366692,1.203615,159.781384,57.822871,1.69727,40.3367,1.89374e-15,1.56595e-15,8.74417e-15,2.86161e-15,1.71327e-14,4.36496e-15,4.98503e-15,5.37443e-15,4.14559e-15,2.10154e-14,3.82476e-14,2.268e-14,4.12548e-14,8.05812e-15,0.586013,0.269717,0.280933,0.188096,-0.828983,0.174685,-0.668078,0.923749,0.0,,-1.145,,,,,0,3.82476e-14,2.268e-14,3.82476e-14,2.268e-14,54619.54919,54619.63059,1,False,http://xmm-catalog.irap.omp.eu/source/20556210...,,1.0,1.280218,2.587687e-14,5.219357e-15,1.441967
214,SRGe J103426.3+563938,158.609475,56.660434,1.494682e-14,1.747588,4.888264,91.72974,0.0,5.37709,202025203010021,4XMM J103426.4+563936,3.642788,1.302321,158.610002,56.660128,1.83646,13.6573,2.09916e-15,1.78275e-15,4.4276e-15,2.53488e-15,6.31438e-15,3.52375e-15,2.14136e-15,3.87434e-15,6.48921e-14,5.12372e-14,9.23737e-14,5.2057e-14,2.11451e-14,6.60706e-15,0.47347,0.26547,0.20407,0.315969,-0.872804,0.269365,0.914148,0.202599,0.0,,-0.664011,,,,,0,9.23737e-14,5.2057e-14,9.23737e-14,5.2057e-14,53299.251389,53299.570845,1,False,http://xmm-catalog.irap.omp.eu/source/20202520...,,1.0,1.516631,1.074198e-14,4.340787e-15,0.71868


In [176]:
xmm_desi = cross_match_data_frames(ero_xmm, desi, colname_ra1 = 'SC_RA',colname_dec1 = 'SC_DEC' , colname_ra2 = 'ra', colname_dec2=  'dec', match_radius = 15, df_prefix = 'desi')

cross-match radius 15 arcsec
total matches: 3284 out of 740 x 2418574
	 total unique pairs: 3284
	 total non-unique pairs (duplicates in df2): 0


In [177]:
xmm_desi_closest = (xmm_desi
    .loc[xmm_desi.groupby('IAUNAME')['desi_sep'].idxmin()]
    )


xmm_hostless = (xmm_desi_closest[
    xmm_desi_closest['desi_sep'] > 2 * xmm_desi_closest['xmm_pos_r98']
    ])
xmm_hostless = xmm_hostless.query('Separation<20')

print('hostless xmm near chandra sources', xmm_hostless.shape[0])
xmm_hostless = xmm_hostless[['srcname_fin',	'RA_fin',	'DEC_fin', 'pos_r98', 'flux_05-20', 'IAUNAME', 'SC_RA',	'SC_DEC', 'xmm_pos_r98', 'flux_05_2', 'Separation', 'desi_desi_id', 'desi_ra', 'desi_dec', 'desi_sep', 'xmm_ero_flux_ratio']]

xmm_hostless.rename(columns = {'Separation':'ero_xmm_sep', 'desi_sep':'desi_sep_minimal'}, inplace = True)

xmm_hostless = add_separation_columns(xmm_hostless, 'RA_fin', 'DEC_fin', 'desi_ra', 'desi_dec', 'desi_ero_sep')

assert xmm_hostless.srcname_fin.nunique()==xmm_hostless.IAUNAME.nunique()
assert xmm_hostless.srcname_fin.nunique()==xmm_hostless.shape[0]
xmm_hostless.sample(7)

hostless xmm near chandra sources 38


Unnamed: 0,srcname_fin,RA_fin,DEC_fin,pos_r98,flux_05-20,IAUNAME,SC_RA,SC_DEC,xmm_pos_r98,flux_05_2,ero_xmm_sep,desi_desi_id,desi_ra,desi_dec,desi_sep_minimal,xmm_ero_flux_ratio,desi_ero_sep
1015,SRGe J104656.6+572859,161.735777,57.482989,5.116249,1.195571e-14,4XMM J104656.4+572900,161.735285,57.483419,2.233941,1.568075e-14,1.819102,9011_609947_2169,161.732737,57.484218,5.707308,1.31157,7.361642
2088,SRGe J105228.9+574333,163.120323,57.725792,6.977685,5.856936e-15,4XMM J105229.0+574331,163.1212,57.725425,2.956029,6.24327e-15,2.140836,9011_610725_3071,163.116521,57.726276,9.50271,1.065962,7.515293
178,SRGe J105336.4+573800,163.401649,57.633145,3.102382,3.871008e-14,4XMM J105336.3+573800,163.401438,57.633437,0.226514,4.79094e-14,1.127205,9011_610726_1583,163.401652,57.633288,0.676023,1.237647,0.515835
1857,SRGe J105640.4+573203,164.168282,57.534091,6.063802,6.609721e-15,4XMM J105640.2+573204,164.167672,57.534513,1.420083,3.377601e-15,1.921706,9011_609952_2647,164.167686,57.536689,7.833919,0.511005,9.422389
3155,SRGe J104932.9+572948,162.387049,57.496591,9.021607,2.838786e-15,4XMM J104933.1+572944,162.388154,57.495784,3.423898,1.267586e-16,3.607264,9011_609948_3971,162.388586,57.497958,7.870032,0.044652,5.748276
2550,SRGe J104122.8+590252,160.344824,59.047725,8.90271,4.231633e-15,4XMM J104122.6+590255,160.344377,59.0488,3.592702,2.114995e-15,3.958391,9011_614514_966,160.344296,59.051217,8.701604,0.499806,12.60907
3074,SRGe J104410.7+585421,161.04443,58.905714,10.203106,3.030344e-15,4XMM J104411.1+585424,161.046321,58.906725,1.148625,7.38478e-15,5.060578,9011_614515_2457,161.0457,58.906094,2.547293,2.436944,2.729407


# Joining CSC and XMM hostless candidates

In [178]:
csc_ctps = csc_hostless[['srcname_fin', 'RA_fin','DEC_fin', 'pos_r98', 'flux_05-20', 'ero_csc_sep', 'csc_name',   'csc_ra', 'csc_dec', 'csc_flux_05_2', 'csc_r_98_csc', 'desi_sep_minimal', 'desi_desi_id',  'desi_ra', 'desi_dec', 'desi_ero_sep', 'csc_ero_flux_ratio']]

xmm_ctps = xmm_hostless[['srcname_fin', 'RA_fin','DEC_fin', 'pos_r98', 'flux_05-20', 'ero_xmm_sep','IAUNAME', 'SC_RA','SC_DEC',  'flux_05_2', 'xmm_pos_r98', 'desi_sep_minimal', 'desi_desi_id',  'desi_ra', 'desi_dec', 'desi_ero_sep',  'xmm_ero_flux_ratio']]

csc_ctps.rename(columns = {'desi_desi_id':'desi_id'}, inplace = True)
xmm_ctps.rename(columns = {'desi_desi_id':'desi_id'}, inplace = True)

In [179]:
xmm_ctps = xmm_ctps[xmm_ctps['xmm_ero_flux_ratio'] <  5 ]
xmm_ctps = xmm_ctps[xmm_ctps['xmm_ero_flux_ratio'] >  1/5 ]

csc_ctps = csc_ctps[csc_ctps['csc_ero_flux_ratio'] <  5 ]
csc_ctps = csc_ctps[csc_ctps['csc_ero_flux_ratio'] >  1/5 ]

In [180]:
xmm_ctps['x_ray_det'] = 'xmm'
csc_ctps['x_ray_det'] = 'csc'
print('XMM companions', xmm_ctps.shape[0])
print('CSC companions', csc_ctps.shape[0])
print('Total companions possible', xmm_ctps.shape[0] + csc_ctps.shape[0])

XMM companions 37
CSC companions 21
Total companions possible 58


In [181]:
final_ctps = pd.concat([xmm_ctps, csc_ctps])
final_ctps.sort_values(by='srcname_fin', inplace=True)
final_ctps.reset_index(drop=True, inplace=True)


n_ctps = final_ctps.groupby('srcname_fin')['x_ray_det'].transform(lambda x: len(x.unique())) 
final_ctps['n_x_ray_det'] = n_ctps


n_desi_ctps = final_ctps.groupby('srcname_fin')['desi_id'].transform(lambda x: len(x.unique()))
final_ctps['n_desi_ctps'] = n_desi_ctps


final_ctps = final_ctps.query('( n_x_ray_det == 1 ) | ( n_x_ray_det==2 & n_desi_ctps == 1 )')
## final_ctps.query('( n_x_ray_det == 1 ) | ( n_x_ray_det==2 & n_desi_ctps == 1 )') - opposite of this


final_ctps.drop_duplicates(subset=['srcname_fin', 'desi_id'], inplace=True)
print('Final number of hostless', final_ctps.shape[0])
final_ctps.reset_index()



Final number of hostless 54


Unnamed: 0,index,srcname_fin,RA_fin,DEC_fin,pos_r98,flux_05-20,ero_xmm_sep,IAUNAME,SC_RA,SC_DEC,flux_05_2,xmm_pos_r98,desi_sep_minimal,desi_id,desi_ra,desi_dec,desi_ero_sep,xmm_ero_flux_ratio,x_ray_det,ero_csc_sep,csc_name,csc_ra,csc_dec,csc_flux_05_2,csc_r_98_csc,csc_ero_flux_ratio,n_x_ray_det,n_desi_ctps
0,0,SRGe J103121.9+573134,157.841091,57.526169,8.679921,4.350933e-15,,,,,,,8.192553,9011_609939_482,157.839339,57.524106,8.162275,,csc,0.504317,2CXO J103121.9+573134,157.841331,57.526115,3.862979e-15,2.33175,0.887851,1,1
1,1,SRGe J103158.3+573841,157.993098,57.644831,11.907729,3.019065e-15,,,,,,,8.059029,9011_610714_3345,157.981929,57.64706,22.964569,,csc,14.930985,2CXO J103156.5+573846,157.985747,57.646145,1.189964e-14,1.155268,3.941499,1,1
2,2,SRGe J103220.4+573211,158.084926,57.53645,5.714911,9.351694e-15,,,,,,,9.735829,9011_609939_2890,158.088107,57.538123,8.606599,,csc,1.42118,2CXO J103220.2+573211,158.084192,57.536421,4.85286e-15,1.498974,0.518928,1,1
3,3,SRGe J103239.4+574033,158.163965,57.675913,6.556224,8.782485e-15,3.16925,4XMM J103239.1+574036,158.163236,57.676703,5.20523e-15,1.764312,11.35396,9011_610715_599,158.169132,57.676603,10.251895,0.592683,xmm,,,,,,,,2,1
4,5,SRGe J103251.8+574550,158.215643,57.763855,11.176683,2.601944e-15,4.011813,4XMM J103252.0+574546,158.216941,57.762982,2.69919e-15,1.462532,8.608715,9011_610715_1014,158.221411,57.763161,11.355228,1.037374,xmm,,,,,,,,2,1
5,7,SRGe J103302.4+580241,158.260049,58.044662,6.663467,6.085609e-15,,,,,,,6.516825,9011_611484_2685,158.264079,58.04597,9.006889,,csc,3.061539,2CXO J103302.7+580240,158.261655,58.044693,5.264955e-15,2.090497,0.865148,1,1
6,8,SRGe J103349.3+584441,158.455573,58.744828,6.972946,5.515691e-15,0.782359,4XMM J103349.2+584440,158.455253,58.744687,9.50664e-15,3.907439,12.20385,9011_613762_405,158.448776,58.745133,12.743523,1.723563,xmm,,,,,,,,1,1
7,9,SRGe J103409.4+574725,158.539363,57.790228,7.180546,4.495352e-15,,,,,,,6.80841,9011_610715_3565,158.543036,57.792069,9.673252,,csc,3.623997,2CXO J103409.5+574728,158.539889,57.791195,2.186806e-15,1.230499,0.486459,1,1
8,10,SRGe J103518.6+562405,158.827436,56.401399,6.233959,8.711545e-15,5.63875,4XMM J103518.0+562408,158.825121,56.4023,9.7702e-15,3.030869,6.995479,9011_606788_302,158.827919,56.401126,1.375083,1.121523,xmm,,,,,,,,1,1
9,11,SRGe J103520.2+573355,158.834253,57.565203,8.727764,3.591921e-15,,,,,,,6.01555,9011_609941_914,158.835675,57.562204,11.139738,,csc,7.821427,2CXO J103520.9+573349,158.83714,57.563679,4.241985e-15,2.384348,1.18098,1,1


In [182]:
# #save final_ctps_csv to csv
final_ctps_csv_coords = final_ctps[['srcname_fin', 'RA_fin',	'DEC_fin',	'pos_r98']]
final_ctps_csv_coords_matched = cross_match_data_frames(final_ctps_csv_coords, desi[['desi_id', 'ra', 'dec', 'type']], 'RA_fin', 'DEC_fin', 'ra', 'dec', closest=True, match_radius=30)
final_ctps_csv_coords_matched

cross-match radius 30 arcsec
total matches: 684 out of 54 x 2418574
	 total unique pairs: 684
	 total non-unique pairs (duplicates in df2): 0
total closest matches: 54


Unnamed: 0,srcname_fin,RA_fin,DEC_fin,pos_r98,desi_id,ra,dec,type,sep,n_near,n_matches
12,SRGe J103121.9+573134,157.841091,57.526169,8.679921,9011_609939_482,157.839339,57.524106,REX,8.162275,17,1
25,SRGe J103158.3+573841,157.993098,57.644831,11.907729,9011_610714_3400,157.9888,57.643752,PSF,9.144548,12,1
30,SRGe J103220.4+573211,158.084926,57.53645,5.714911,9011_609939_2890,158.088107,57.538123,PSF,8.606599,12,1
43,SRGe J103239.4+574033,158.163965,57.675913,6.556224,9011_610715_599,158.169132,57.676603,DEV,10.251895,16,1
61,SRGe J103251.8+574550,158.215643,57.763855,11.176683,9011_610715_1014,158.221411,57.763161,REX,11.355228,12,1
72,SRGe J103302.4+580241,158.260049,58.044662,6.663467,9011_611484_2685,158.264079,58.04597,REX,9.006889,5,1
81,SRGe J103349.3+584441,158.455573,58.744828,6.972946,9011_613762_405,158.448776,58.745133,PSF,12.743523,11,1
97,SRGe J103409.4+574725,158.539363,57.790228,7.180546,9011_610715_3500,158.536213,57.788361,REX,9.038309,15,1
105,SRGe J103518.6+562405,158.827436,56.401399,6.233959,9011_606788_302,158.827919,56.401126,REX,1.375083,18,1
121,SRGe J103520.2+573355,158.834253,57.565203,8.727764,9011_609941_904,158.834278,57.566603,REX,5.04188,15,1


In [183]:
final_ctps_csv_coords_matched.query('sep>pos_r98 & type=="PSF"')

Unnamed: 0,srcname_fin,RA_fin,DEC_fin,pos_r98,desi_id,ra,dec,type,sep,n_near,n_matches
30,SRGe J103220.4+573211,158.084926,57.53645,5.714911,9011_609939_2890,158.088107,57.538123,PSF,8.606599,12,1
81,SRGe J103349.3+584441,158.455573,58.744828,6.972946,9011_613762_405,158.448776,58.745133,PSF,12.743523,11,1
137,SRGe J103525.8+595331,158.857628,59.891915,8.250369,9011_617451_1827,158.86191,59.888438,PSF,14.712714,8,1
165,SRGe J103748.7+585641,159.452829,58.944819,5.360453,9011_614512_1573,159.448476,58.943895,PSF,8.741244,22,1
243,SRGe J104122.8+590252,160.344824,59.047725,8.90271,9011_614514_927,160.339396,59.048743,PSF,10.696558,15,1
280,SRGe J104341.0+590023,160.920944,59.006447,10.333953,9011_614515_1534,160.919953,59.00942,PSF,10.858004,14,1
301,SRGe J104453.1+585450,161.221287,58.913756,8.352394,9011_614516_382,161.22691,58.914631,PSF,10.915065,8,1
395,SRGe J104656.6+572859,161.735777,57.482989,5.116249,9011_609947_2233,161.73915,57.482406,PSF,6.8549,12,1
662,SRGe J105640.4+573203,164.168282,57.534091,6.063802,9011_609952_2647,164.167686,57.536689,PSF,9.422389,14,1


In [184]:
# #save final_ctps_csv to csv
final_ctps_csv = final_ctps[['srcname_fin', 'desi_id']]
final_ctps_csv.rename(columns={'srcname_fin':'ID'}, inplace=True)
final_ctps_csv.rename(columns={'desi_id':'desi_id_true_ctp'}, inplace=True)
final_ctps_csv.desi_id_true_ctp = 'hostless'
final_ctps_csv.ID = final_ctps_csv.ID.str.encode('utf-8')
final_ctps_csv.desi_id_true_ctp = final_ctps_csv.desi_id_true_ctp.str.encode('utf-8')
final_ctps_csv.to_pickle(data_path+'validation_ctps_ero_desi_lh_hostless.pkl')
final_ctps_csv

Unnamed: 0,ID,desi_id_true_ctp
0,b'SRGe J103121.9+573134',b'hostless'
1,b'SRGe J103158.3+573841',b'hostless'
2,b'SRGe J103220.4+573211',b'hostless'
3,b'SRGe J103239.4+574033',b'hostless'
5,b'SRGe J103251.8+574550',b'hostless'
7,b'SRGe J103302.4+580241',b'hostless'
8,b'SRGe J103349.3+584441',b'hostless'
9,b'SRGe J103409.4+574725',b'hostless'
10,b'SRGe J103518.6+562405',b'hostless'
11,b'SRGe J103520.2+573355',b'hostless'


# Combine non-hostless and hostless catalogs to produce final validation sample

In [185]:
df_ctps = pd.read_pickle(data_path+'validation_ctps_ero_desi_lh_no_hostless.pkl')
df_hostless = pd.read_pickle(data_path+'validation_ctps_ero_desi_lh_hostless.pkl')
df_ctps = pd.concat([df_ctps, df_hostless])

df_ctps.to_pickle(data_path+'validation_ctps_ero_desi_lh.pkl')
df_ctps.sample(10)

Unnamed: 0,ID,desi_id_true_ctp
776,b'SRGe J110542.4+590043',b'9011_614526_3153'
48,b'SRGe J103308.8+573832',b'9011_610715_1492'
727,b'SRGe J105520.3+574804',b'9011_610727_980'
110,b'SRGe J103419.7+574450',b'9011_610716_254'
17,b'SRGe J104020.7+594143',b'hostless'
550,b'SRGe J104934.6+591956',b'9011_615261_2917'
469,b'SRGe J104757.3+573451',b'9011_609948_337'
221,b'SRGe J103953.2+574056',b'9011_610719_174'
720,b'SRGe J105503.7+572342',b'9011_609951_3368'
686,b'SRGe J105329.0+572104',b'9011_609170_3546'
