# CSC/XMM validation catalogs, XMM hostless catalog

In [1]:
import sys
sys.path.append('../')

In [2]:
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np

import matplotlib.pyplot as plt
# plt.style.use('seaborn')

import seaborn as sns

import json

from scripts.utils import set_mpl, data_path
from scripts.cross_match_scripts import cross_match_data_frames, fits_to_pandas
set_mpl()

%matplotlib inline
from astropy.table import Table

matplotlib settings set










matplotlib settings set
matplotlib settings set


***

> pos_r_correction() is outdated, as well as processing of ERO duplicates in xray_filtration()

In [3]:
def csc_error_converter(df: pd.DataFrame,
                        r0_colname='err_ellipse_r0',
                        r1_colname='err_ellipse_r1') -> pd.DataFrame:
    """
    The function converts default radii `r0_colname`
    and `r1_colname` to the one-sigma error.

    Args:
        df (pd.DataFrame): DataFrame with `r0_colname` and
        `r1_colname` columns.
        
        r0_colname (str): major radius of the 95% confidence
        level position error ellipse.
        Defaults to 'err_ellipse_r0'.
        
        r1_colname (str): minor radius of the 95% confidence
        level position error ellipse.
        Defaults to 'err_ellipse_r1'.

    Returns:
        pd.DataFrame: one-sigma error in arcseconds.
    """

    # Conversion coefficient
    csc_sigma_coeff = np.sqrt(-(1 / (2 * np.log(1 - .95))))

    err_r1 = df[r0_colname]
    err_r2 = df[r1_colname]
    # Effective error
    csc_err_eff = np.sqrt(err_r1 ** 2 + err_r2 ** 2)

    csc_1sigma = csc_sigma_coeff * csc_err_eff

    return csc_1sigma


def vot2pd_csc(csc_cat_path: str,
               colnames: list,
               save_coords: bool = False, 
               radec_fits_name: str = 'cscresults_name_radec') -> pd.DataFrame:
    """
    The function converts the votable file to pandas DataFrame.

    Optionally saves the separate file with the coordinates and names of
    the CSC sources to the FITS file.

    Args:
        csc_cat_path (str): the path to the votable file.
        colnames (list): column names of the votable file.
        save_coords (bool): if True, saves the separate file with the coordinates
        and names of the CSC sources to the FITS file.
        radec_fits_name (str): name of the FITS file with
        the coordinates and names of the CSC sources to be saved.

    Returns:
        pd.DataFrame: converted catalogue.
    """
    
    vot_table = Table.read(csc_cat_path, format='votable')
    
    df = vot_table.to_pandas()
    df.columns = colnames

    df = df.assign(flux_csc_05_2 = lambda x: x.flux_aper_s + x.flux_aper_m)

    one_sigma_errors = csc_error_converter(df)

    df = df.assign(radec_err_csc = one_sigma_errors)

    df['r_98_csc'] = tsource_r(sigma=df['radec_err_csc'], t_thresh=.02)

    if save_coords:

        df_init_coords = df[['name', 'ra', 'dec']]
        Table.from_pandas(df_init_coords).write(f'data/{radec_fits_name}.fits', format='fits')

    return df


def tsource_r(sigma: float, t_thresh: float) -> float:
    """
    Calculates the radius of circle which contains the probability `t_thresh`
    NOT to find a counterpart for a source with localization error `sigma`.

    Args:
        sigma (float): localization error in arcsec.
        t_thresh (float): probability to NOT find a counterpart.

    Returns:
        float: radius of circle in arcsec.
    """
    
    rsearch = sigma * np.sqrt(-2 * np.log(t_thresh))
    
    return rsearch


def poserr2sigma_coeff(conf_level: float) -> float:
    """
    Calculates convertion coefficient to go from
    positional error to sigma given confidence level.

    For details see:
    https://www.notion.so/Theory-ca6e7795b40c43b4ba6d96bc59727efa#b4d9fc11ff8243a3834e9eeba08c2273
    """
    coeff = (-2 * np.log(1 - conf_level)) ** -0.5
    
    return coeff


def fsource_r(rho: float, f_thresh: float) -> float:
    """
    The function calculates the radius which corresponds to the probability `f_thresh`
    to FIND one or more false sources. 

    Args:
        rho (float): the density of the false sources (in arcmin^{-2}).
        f_thresh (float): probability to FIND one or more false sources.

    Returns:
        float: radius (in arcsec).
    """

    rho = rho / 3600 # arcmin^{-2} -> arcsec^{-2}
    pf_r = np.sqrt(-1 / (rho * np.pi) * np.log(1 - f_thresh))
    
    return pf_r


def pos_r(sigma: float, conf_level: float) -> float:
    """
    Calculates the radius of circle which contains the probability `conf_level`
    of finding (opposite to tsource_r()) a counterpart for a source with
    localization error `sigma`.

    For details see:
    https://www.notion.so/Theory-ca6e7795b40c43b4ba6d96bc59727efa#0ba88df64d2d4d9583f93d63dbe1b927

    Args:
        sigma (float): localization error in arcsec.
        conf_level (float): probability of finding a counterpart.

    Returns:
        float: radius of circle in arcsec.
    """
    
    r_pos = sigma * np.sqrt(-2 * np.log(1 - conf_level))

    return r_pos


# def only_reliable_xmm(df: pd.DataFrame) -> pd.DataFrame:
#     """
#     Filters out unreliable XMM sources.
#     """
#     reliable_df = df[
#                     ((df['xmm_SC_SUM_FLAG'] == 0) |
#                     (df['xmm_SC_SUM_FLAG'] == 1)) &
#                     (df['xmm_SC_DET_ML'] > 10) &
#                     ~(df['xmm_SC_VAR_FLAG'] == True) &
#                     (df['xmm_SC_EXTENT'] == 0) &
#                     (df['xmm_CONFUSED'] == False)
#                     ]

#     return reliable_df


def only_reliable_xmm(df: pd.DataFrame) -> pd.DataFrame:
    """
    Filters out unreliable XMM sources.
    """
    reliable_df = df[
                    ((df['SC_SUM_FLAG'] == 0) |
                    (df['SC_SUM_FLAG'] == 1)) &
                    (df['SC_DET_ML'] > 10) &
                    ~(df['SC_VAR_FLAG'] == True) &
                    (df['SC_EXTENT'] == 0) &
                    (df['CONFUSED'] == False)
                    ]

    return reliable_df




def xray_filtration(df: pd.DataFrame,
                    DL_thresh: float = 6,
                    EL_thresh: float = 6,
                    verbouse=True) -> pd.DataFrame:
    """
    Filters X-ray sources.
    TODO: remake processing of duplicates
    """
    
    if verbouse:
        print(f'DET_LIKE_0 > {DL_thresh}')
        print(f'EXT_LIKE < {EL_thresh}')
        print()

        print(f'Before X-ray source filters: {len(df)}')

    df = df[(df['DET_LIKE_0'] > DL_thresh)&
            (df['EXT_LIKE'] < EL_thresh)]

    if verbouse:
        print(f'After X-ray source filters: {len(df)}')
        print()


    # Manually get rid of faint sources in duplicated pairs
    df = df[~((df['srcname_fin']=='SRGe J104659.3+573056')&(df['DET_LIKE_0'] < 20))]
    df = df[~((df['srcname_fin'] == 'SRGe J104700.7+574558')&(df['DET_LIKE_0'] < 20))]
    print('Weak ERO duplicates removed (temporary measure)')
    print()

    return df


def colnames():

    # Словарь для унификации имен колонок

    srg_names = {
                'id_src_name': 'srcname_fin',  # Индексы рентгеновских источников
                'x_ra_name': 'RA_fin',  # Координаты рентгеновских источников
                'x_dec_name': 'DEC_fin',
                'dl_name': 'DET_LIKE_0',  # Detection Likelihood
                'x_flux_name': 'flux_05-20',
                'ext_name': 'EXT_LIKE',  # Протяженность рентгеновских источников
                'ls_ra_name': 'ra',  # Координаты источников DESI
                'ls_dec_name': 'dec',
                'r_98_name': 'pos_r98',  # Позиционная ошибка
                'sigma_2d_name': 'pos_sigma_2d'
                }
    return srg_names

***

## Предобработка

### CSC

In [4]:
# CSC table conversion
csc_columns = ['name', 'ra', 'dec', 'err_ellipse_r0', 'err_ellipse_r1', 'err_ellipse_ang',
               'significance', 'likelihood', 'likelihood_class', 'conf_flag', 'dither_warning_flag',
               'extent_flag', 'pileup_flag', 'sat_src_flag', 'streak_src_flag', 'var_flag',
               'flux_aper_s', 'flux_aper_lolim_s', 'flux_aper_hilim_s', 'flux_aper_m',
               'flux_aper_lolim_m', 'flux_aper_hilim_m']

# Filtration
# data/cscresults.vot obtained via CSCview software
csc_init_df = vot2pd_csc(csc_cat_path=data_path+'cscresults.vot', colnames=csc_columns)

# CSC catalogue filtering
csc_filtered_df = csc_init_df[(csc_init_df['conf_flag'] == False) &
                              (csc_init_df['extent_flag'] == False) &
                              (csc_init_df['sat_src_flag'] == False) &
                              (csc_init_df['streak_src_flag'] == False) &
                              (csc_init_df['pileup_flag'] == False) &
                              (csc_init_df['dither_warning_flag'] == False)]


csc_filtered_df = csc_filtered_df[(csc_filtered_df['likelihood'] > 10) &
                                  (csc_filtered_df['likelihood'] < 10 ** 10)]

data_loss = 1 - csc_filtered_df.shape[0] / csc_init_df.shape[0]

print('*' * 10)
print(f'Data loss due to the source flags and likelihood filtration: {data_loss:.0%}')
print('*' * 10)

print(f'Sources left: {csc_filtered_df.shape[0]}')
# csc_filtered_df.to_csv('data/LH/csc_allsky_filtered.csv', index=False)


**********
Data loss due to the source flags and likelihood filtration: 16%
**********
Sources left: 267265


***

In [5]:
# Каталог ERO (9500)
ero_df = pd.read_pickle(data_path+'ERO_lhpv_03_23_sd01_a15_g14_orig.pkl')

# Ибранные колонки ERO для корреляции с CSC/XMM в TOPCAT
ero_columns = ['srcname_fin', 'RA_fin', 'DEC_fin', 'flux_05-20',
               'pos_sigma_2d', 'pos_r98', 'DET_LIKE_0', 'EXT_LIKE']

ero_slim_point_filtered = xray_filtration(ero_df[ero_columns], DL_thresh=6, EL_thresh=6)

# ero_slim_point_filtered.to_csv('data/LH/ero_slim_point_filtered.csv', index=False)

DET_LIKE_0 > 6
EXT_LIKE < 6

Before X-ray source filters: 9500
After X-ray source filters: 9228

Weak ERO duplicates removed (temporary measure)



In [6]:
srg_names = colnames()

srg_names

{'id_src_name': 'srcname_fin',
 'x_ra_name': 'RA_fin',
 'x_dec_name': 'DEC_fin',
 'dl_name': 'DET_LIKE_0',
 'x_flux_name': 'flux_05-20',
 'ext_name': 'EXT_LIKE',
 'ls_ra_name': 'ra',
 'ls_dec_name': 'dec',
 'r_98_name': 'pos_r98',
 'sigma_2d_name': 'pos_sigma_2d'}

***

In [7]:
# desi_lh.gz_pkl number of sources and area
false_dens_arcmin = 2418574 / (41.729 * 3600)
print(false_dens_arcmin)
false_dens_arcsec = false_dens_arcmin / 3600

print(f'{ false_dens_arcsec:.3f}')

FALSE_PROB = .03

r_false = fsource_r(false_dens_arcmin, FALSE_PROB)

print(f'Радиус для {FALSE_PROB:.0%} вероятности найти ложный: {r_false:.2f}"')

16.099741453452303
0.004
Радиус для 3% вероятности найти ложный: 1.47"


***

## Topcat ER0-CSC: all in 30 sec <a class="anchor" id="section_2"></a>

Файл `csc_allsky_ero_slim_point_30sec` получен через TOPCAT: `ero_slim_point_filtered.csv` ⤫ `csc_filtered_df.csv`, все источники в 30"

In [44]:
from scripts.cross_match_scripts import cross_match_data_frames
ero_csc = cross_match_data_frames(ero_slim_point_filtered, csc_filtered_df,
 'RA_fin', 'DEC_fin', 'ra', 'dec', match_radius = 30)
ero_csc

matplotlib settings set
cross-match radius 30 arcsec
total matches: 742 out of 9226 x 267265
	 total unique pairs: 689
	 total non-unique pairs (duplicates in df2): 53


Unnamed: 0,srcname_fin,RA_fin,DEC_fin,flux_05-20,pos_sigma_2d,pos_r98,DET_LIKE_0,EXT_LIKE,sep,name,ra,dec,err_ellipse_r0,err_ellipse_r1,err_ellipse_ang,significance,likelihood,likelihood_class,conf_flag,dither_warning_flag,extent_flag,pileup_flag,sat_src_flag,streak_src_flag,var_flag,flux_aper_s,flux_aper_lolim_s,flux_aper_hilim_s,flux_aper_m,flux_aper_lolim_m,flux_aper_hilim_m,flux_csc_05_2,radec_err_csc,r_98_csc,n_near,n_matches
0,SRGe J110242.6+594120,165.677384,59.688978,1.420726e-13,2.736290,7.653812,211.181183,0.0,2.779278,2CXO J110242.8+594122,165.678600,59.689447,2.291189,2.008557,31.648612,14.313177,629.754672,TRUE,False,False,False,False,False,False,False,1.247590e-13,1.097446e-13,1.397733e-13,9.484644e-14,8.480775e-14,1.048851e-13,2.196054e-13,1.244794,3.481875,1,1
1,SRGe J105130.7+573439,162.877967,57.577478,1.338790e-13,0.807089,2.257549,2212.055908,0.0,2.202561,2CXO J105130.8+573440,162.878645,57.577971,2.091168,1.732183,155.008478,4.833333,99.885284,TRUE,False,False,False,False,False,False,False,,,,,,,,1.109350,3.103018,1,2
2,SRGe J105316.7+573551,163.319421,57.597379,1.327770e-13,0.813768,2.276231,2174.864502,0.0,0.970884,2CXO J105316.7+573550,163.319906,57.597453,0.712676,0.711335,57.707508,44.571519,15055.956674,TRUE,False,False,False,False,False,False,False,,,,,,,,0.411369,1.150662,1,1
3,SRGe J105300.9+574208,163.253903,57.702143,1.266503e-13,0.816646,2.284281,2098.832275,0.0,1.147400,2CXO J105301.0+574208,163.254465,57.702249,0.726467,0.720211,169.452033,7.219485,381.913259,TRUE,False,False,False,False,False,False,False,,,,,,,,0.417921,1.168988,1,1
4,SRGe J104144.5+594258,160.435424,59.716091,1.125101e-13,0.835654,2.337449,1862.223511,0.0,2.516922,2CXO J104144.8+594258,160.436809,59.716113,6.180927,6.180927,0.000000,5.411765,104.364506,TRUE,False,False,False,False,False,False,False,8.864593e-14,6.113513e-14,1.146284e-13,6.073510e-14,4.220575e-14,7.823504e-14,1.493810e-13,3.571101,9.988904,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
737,SRGe J104353.8+584042,160.974258,58.678243,2.384449e-15,3.260011,9.118739,7.483704,0.0,21.954072,2CXO J104356.2+584052,160.984561,58.681160,1.518464,1.334340,0.283260,2.914286,22.027993,TRUE,False,False,False,False,False,False,False,0.000000e+00,0.000000e+00,3.343202e-16,5.459398e-16,2.339742e-16,8.579054e-16,5.459398e-16,0.825835,2.309983,2,1
738,SRGe J104730.2+584301,161.875925,58.716881,2.380455e-15,3.949338,11.046889,6.768798,0.0,1.225655,2CXO J104730.0+584301,161.875400,58.717085,1.497509,1.052945,106.765793,6.358832,108.524810,TRUE,False,False,False,False,False,False,False,2.731383e-15,1.837476e-15,3.575628e-15,1.870078e-15,1.409751e-15,2.330404e-15,4.601460e-15,0.747886,2.091949,1,1
740,SRGe J104624.2+584333,161.600662,58.725841,2.220139e-15,3.562497,9.964838,6.153393,0.0,4.155011,2CXO J104624.0+584328,161.600304,58.724701,0.933511,0.878197,18.479683,4.486486,67.095119,TRUE,False,False,False,False,False,False,False,6.391524e-16,3.669208e-16,8.995479e-16,1.154755e-15,8.042044e-16,1.505306e-15,1.793907e-15,0.523611,1.464619,2,1
739,SRGe J104624.2+584333,161.600662,58.725841,2.220139e-15,3.562497,9.964838,6.153393,0.0,23.924891,2CXO J104622.1+584315,161.592238,58.720836,1.642532,1.251769,171.757215,2.421053,16.040531,MARGINAL,False,False,False,False,False,False,False,2.798061e-16,0.000000e+00,5.596122e-16,2.750883e-16,1.192049e-16,4.309717e-16,5.548945e-16,0.843694,2.359937,2,1


In [35]:

# Сюда буду сохранять статистику фильтрации источников CSC
csc_stat = []

ero_csc = ero_csc.assign(flux_05_2 = lambda x: x.flux_aper_s + x.flux_aper_m)

# Ошибки на поток 0.5-2 кэВ
s_up = ero_csc.flux_aper_hilim_s - ero_csc.flux_aper_s
s_down = ero_csc.flux_aper_s - ero_csc.flux_aper_lolim_s
s_err = np.sqrt(s_up ** 2 + s_down ** 2)

m_up = ero_csc.flux_aper_hilim_m - ero_csc.flux_aper_m
m_down = ero_csc.flux_aper_m - ero_csc.flux_aper_lolim_m
m_err = np.sqrt(m_up ** 2 + m_down ** 2)

sm_err = np.sqrt(s_err ** 2 + m_err ** 2).values

ero_csc['flux_aper_sm_err'] = sm_err

err_r1 = ero_csc['err_ellipse_r0']
err_r2 = ero_csc['err_ellipse_r1']
csc_err_eff = np.sqrt(err_r1 ** 2 + err_r2 ** 2)

# Коэффициент для перехода к ошибке в sigma для csc
csc_sigma_coeff = poserr2sigma_coeff(.95)
csc_sigma = csc_sigma_coeff * csc_err_eff
ero_csc.insert(8, 'sigma', csc_sigma)

# pos_r98
csc_r98 = pos_r(csc_sigma, .98)
ero_csc.insert(8, 'csc_pos_r98', csc_r98)

csc_useful_cols = [srg_names['id_src_name'], 'name', 'ra', 'dec',
                   'err_ellipse_r0', 'err_ellipse_r1', 'sigma', 'csc_pos_r98', 'flux_05_2',
                   'flux_aper_sm_err', 'likelihood', 'likelihood_class', 'conf_flag', 'n_near', 'n_matches', 'sep']

# Добавляю префикс `csc_` ко всем колонкам и часть переименовываю
ero_csc = (ero_csc[csc_useful_cols].add_prefix('csc_')
           .rename(columns={'csc_' + srg_names['id_src_name']: srg_names['id_src_name'],
                            'csc_Separation': 'sep_ero_csc',
                            'csc_csc_pos_r98': 'csc_pos_r98'}))

ero_csc['csc_n_near'] = ero_csc['csc_n_near'].fillna(1)

print(f'Всего источников CSC в 30"-окружении источников ERO LH: {ero_csc.shape[0]}')

ero_csc_total = len(ero_csc[srg_names['id_src_name']].unique())

csc_stat_1 = f'Источники ERO, в 30" от которых есть CSC/XMM: {ero_csc_total}'
csc_stat.append(csc_stat_1)

ero_csc_single = ero_csc[ero_csc['csc_n_near']==1]
ero_csc_single = ero_csc_single[ero_csc_single['csc_n_matches']==1]

csc_stat_2 = f'Источники ERO, в 30" от которых CSC/XMM единственный: {ero_csc_single.shape[0]}'
csc_stat_2 = csc_stat_2 + f' ({ero_csc_single.shape[0] / ero_csc_total :.0%} от {ero_csc_total})'
csc_stat.append(csc_stat_2)

csc_no_flux = ero_csc_single['csc_flux_05_2'].isna().sum()

csc_stat_3 = f'Из них поток отсутствует для {csc_no_flux} CSC ({csc_no_flux / ero_csc_single.shape[0] :.0%})'
# csc_stat.append(csc_stat_3)

csc_stat

Всего источников CSC в 30"-окружении источников ERO LH: 742


['Источники ERO, в 30" от которых есть CSC/XMM: 652',
 'Источники ERO, в 30" от которых CSC/XMM единственный: 563 (86% от 652)']

In [68]:
ero_csc

Unnamed: 0,srcname_fin,csc_name,csc_ra,csc_dec,csc_err_ellipse_r0,csc_err_ellipse_r1,csc_sigma,csc_pos_r98,csc_flux_05_2,csc_flux_aper_sm_err,csc_likelihood,csc_likelihood_class,csc_conf_flag,csc_n_near,csc_sep
0,SRGe J110242.6+594120,2CXO J110242.8+594122,165.678600,59.689447,2.291189,2.008557,1.244794,3.481875,2.196054e-13,2.554235e-14,629.754672,TRUE,False,1,2.779278
1,SRGe J105130.7+573439,2CXO J105130.8+573440,162.878645,57.577971,2.091168,1.732183,1.109350,3.103018,,,99.885284,TRUE,False,1,2.202561
2,SRGe J105316.7+573551,2CXO J105316.7+573550,163.319906,57.597453,0.712676,0.711335,0.411369,1.150662,,,15055.956674,TRUE,False,1,0.970884
3,SRGe J105300.9+574208,2CXO J105301.0+574208,163.254465,57.702249,0.726467,0.720211,0.417921,1.168988,,,381.913259,TRUE,False,1,1.147400
4,SRGe J104144.5+594258,2CXO J104144.8+594258,160.436809,59.716113,6.180927,6.180927,3.571101,9.988904,1.493810e-13,4.562363e-14,104.364506,TRUE,False,1,2.516922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
737,SRGe J104353.8+584042,2CXO J104356.2+584052,160.984561,58.681160,1.518464,1.334340,0.825835,2.309983,5.459398e-16,5.535477e-16,22.027993,TRUE,False,2,21.954072
738,SRGe J104730.2+584301,2CXO J104730.0+584301,161.875400,58.717085,1.497509,1.052945,0.747886,2.091949,4.601460e-15,1.391266e-15,108.524810,TRUE,False,1,1.225655
740,SRGe J104624.2+584333,2CXO J104624.0+584328,161.600304,58.724701,0.933511,0.878197,0.523611,1.464619,1.793907e-15,6.226454e-16,67.095119,TRUE,False,2,4.155011
739,SRGe J104624.2+584333,2CXO J104622.1+584315,161.592238,58.720836,1.642532,1.251769,0.843694,2.359937,5.548945e-16,4.529704e-16,16.040531,MARGINAL,False,2,23.924891


In [69]:
# Список полей, которые могут вернуться в рассмотрение, если в них окажется
# единственный источник XMM (потребуется выкинуть их снова)

csc_crowded_fields = ero_csc[ero_csc['csc_n_near'] > 1][srg_names['id_src_name']].unique()

len(csc_crowded_fields)

73

Файл `csc_single_desi_15sec` получен через TOPCAT: `csc_single_30sec_ero.csv` ⤫ `desi_lh.csv`, все источники в 15"

In [86]:
desi = pd.read_pickle(data_path+'desi_lh.gz_pkl', compression='gzip')

In [94]:
csc_desi = cross_match_data_frames(ero_csc, desi, colname_ra1 = 'csc_ra',colname_dec1 = 'csc_dec' , colname_ra2 = 'ra', colname_dec2=  'dec', match_radius = 15)

matplotlib settings set
cross-match radius 15 arcsec
total matches: 3336 out of 742 x 2418574


In [96]:
csc_desi

Unnamed: 0,srcname_fin,csc_name,csc_ra,csc_dec,csc_err_ellipse_r0,csc_err_ellipse_r1,csc_sigma,csc_pos_r98,csc_flux_05_2,csc_flux_aper_sm_err,csc_likelihood,csc_likelihood_class,csc_conf_flag,csc_n_near,csc_sep,sep,release,objid,brickid,ra,dec,flux_g,flux_r,flux_z,flux_w1,flux_w2,flux_w3,flux_w4,flux_ivar_g,flux_ivar_r,flux_ivar_z,flux_ivar_w1,flux_ivar_w2,flux_ivar_w3,flux_ivar_w4,dered_mag_g,dered_mag_r,dered_mag_z,dered_mag_w1,dered_mag_w2,dered_mag_w3,dered_mag_w4,snr_g,snr_r,snr_z,snr_w1,snr_w2,snr_w3,snr_w4,type,parallax,parallax_ivar,pmra,pmra_ivar,pmdec,pmdec_ivar,ref_cat,ref_id,mjd_max,mjd_min,iso_max,iso_min,desi_id,n_near
2,SRGe J110242.6+594120,2CXO J110242.8+594122,165.678600,59.689447,2.291189,2.008557,1.244794,3.481875,2.196054e-13,2.554235e-14,629.754672,TRUE,False,1,2.779278,2.540902,9011,4152,616737,165.678178,59.688774,123.783880,140.066150,194.811190,287.176500,388.446700,1089.463000,2591.550300,53.469475,22.336110,17.090487,1.092097,0.326052,0.001556,0.000019,17.247953,17.120436,16.768284,16.353460,16.025955,14.906816,13.966043,905.142700,661.967900,805.361940,300.109280,221.806760,42.980423,11.174195,PSF,-0.023313,140.88054,-0.008907,94.32313,0.07499,50.308243,G2,860692071522063104,58194.477145,57813.360879,2018-03-17 11:27:05.369,2017-03-01 08:39:39.978,9011_616737_4152,7
1,SRGe J110242.6+594120,2CXO J110242.8+594122,165.678600,59.689447,2.291189,2.008557,1.244794,3.481875,2.196054e-13,2.554235e-14,629.754672,TRUE,False,1,2.779278,6.681360,9011,4143,616737,165.676791,59.691063,-0.015934,0.072536,0.596225,3.677433,6.544262,-131.121550,-1138.750900,749.922000,238.926740,137.933670,3.421677,0.924410,0.001541,0.000018,,25.335003,23.053860,21.084980,20.459639,,,-0.436357,1.121207,7.002371,6.802433,6.292063,-5.147129,-4.897227,PSF,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,58194.477145,57813.360879,2018-03-17 11:27:05.369,2017-03-01 08:39:39.978,9011_616737_4143,7
0,SRGe J110242.6+594120,2CXO J110242.8+594122,165.678600,59.689447,2.291189,2.008557,1.244794,3.481875,2.196054e-13,2.554235e-14,629.754672,TRUE,False,1,2.779278,7.921696,9011,4145,616737,165.677359,59.691556,0.195642,1.009438,5.025369,20.147530,8.601536,132.703260,256.155700,533.070430,142.571530,69.601860,3.423772,0.957840,0.001511,0.000018,24.251139,22.476190,20.739468,19.238289,20.162850,17.192644,16.478683,4.517050,12.053024,41.925510,37.279858,8.418263,5.158134,1.097786,EXP,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,58194.477145,57813.360879,2018-03-17 11:27:05.369,2017-03-01 08:39:39.978,9011_616737_4145,7
6,SRGe J110242.6+594120,2CXO J110242.8+594122,165.678600,59.689447,2.291189,2.008557,1.244794,3.481875,2.196054e-13,2.554235e-14,629.754672,TRUE,False,1,2.779278,9.511581,9011,4159,616737,165.679651,59.686858,0.197431,1.328728,7.968437,33.717575,22.626148,-38.689648,42.789257,582.890400,156.068020,76.894550,2.705871,0.784038,0.001532,0.000018,24.240913,22.177567,20.238825,18.679182,19.112751,,18.421604,4.766596,16.599430,69.874855,55.463833,20.034536,-1.514373,0.183683,EXP,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,58194.477145,57813.360879,2018-03-17 11:27:05.369,2017-03-01 08:39:39.978,9011_616737_4159,7
3,SRGe J110242.6+594120,2CXO J110242.8+594122,165.678600,59.689447,2.291189,2.008557,1.244794,3.481875,2.196054e-13,2.554235e-14,629.754672,TRUE,False,1,2.779278,13.151090,9011,4215,616737,165.685667,59.688658,5.722161,12.874525,22.458698,14.679907,16.607830,57.873173,1718.805700,199.995600,48.588654,26.819610,3.107258,0.849036,0.001320,0.000017,20.585281,19.711649,19.113695,19.582000,19.448486,18.093650,14.411874,80.922690,89.742600,116.308330,25.876883,15.302977,2.102330,7.133257,DEV,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,58194.477145,57813.360879,2018-03-17 11:27:05.369,2017-03-01 08:39:39.978,9011_616737_4215,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3331,SRGe J104624.2+584333,2CXO J104622.1+584315,161.592238,58.720836,1.642532,1.251769,0.843694,2.359937,5.548945e-16,4.529704e-16,16.040531,MARGINAL,False,2,23.924891,12.531251,9011,2172,613768,161.592149,58.724317,0.235341,0.478851,0.893515,0.676668,1.652257,29.815556,-222.128890,349.123260,107.748440,72.110374,4.914209,1.160982,0.001128,0.000014,24.042963,23.280777,22.611773,22.922470,21.953829,18.813684,,4.397306,4.970564,7.587535,1.500038,1.780288,1.001377,-0.821895,REX,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,57919.193160,57851.323866,2017-06-15 04:38:09.000,2017-04-08 07:46:22.000,9011_613768_2172,2
3332,SRGe J104624.2+584333,2CXO J104622.1+584315,161.592238,58.720836,1.642532,1.251769,0.843694,2.359937,5.548945e-16,4.529704e-16,16.040531,MARGINAL,False,2,23.924891,13.570143,9011,2145,613768,161.588842,58.717505,0.110059,0.690946,1.887229,3.640805,5.885767,-12.176176,-393.020540,387.096000,128.416430,92.206800,4.718281,1.116598,0.001129,0.000014,24.868761,22.883083,21.800200,21.095451,20.574537,,,2.165379,7.829862,18.121998,7.908412,6.219443,-0.409115,-1.460077,REX,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,57919.193160,57851.323866,2017-06-15 04:38:09.000,2017-04-08 07:46:22.000,9011_613768_2145,2
3334,SRGe J104241.6+591357,2CXO J104241.3+591356,160.672263,59.232319,2.365459,1.212545,1.085950,3.037565,1.898607e-15,8.883926e-16,36.298269,TRUE,False,1,2.230656,7.710835,9011,1096,615258,160.668637,59.233390,1.436928,5.050992,12.627905,35.036650,33.209656,289.941440,1063.597000,597.126300,214.927890,44.564034,3.310745,0.929113,0.000901,0.000012,22.081808,20.724985,19.737402,18.637285,18.695974,16.344040,14.932987,35.113018,74.049650,84.299220,63.750774,32.010950,8.702892,3.741132,REX,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,58194.402846,57851.286979,2018-03-17 09:40:05.868,2017-04-08 06:53:15.000,9011_615258_1096,3
3333,SRGe J104241.6+591357,2CXO J104241.3+591356,160.672263,59.232319,2.365459,1.212545,1.085950,3.037565,1.898607e-15,8.883926e-16,36.298269,TRUE,False,1,2.230656,10.079974,9011,1140,615258,160.673198,59.235078,0.088126,0.295803,0.542876,-0.230369,0.240904,-10.851795,377.481350,656.092830,370.002100,81.150406,4.240592,1.092400,0.000973,0.000012,25.112806,23.806034,23.154041,,24.044530,,16.057692,2.257282,5.689897,4.890414,-0.474393,0.251788,-0.338502,1.307389,PSF,0.000000,0.00000,0.000000,0.00000,0.00000,0.000000,,0,58194.402846,57851.286979,2018-03-17 09:40:05.868,2017-04-08 06:53:15.000,9011_615258_1140,3


In [97]:
# Таблица CSC - DESI

csc_desi.rename(columns={'sep': 'sep_csc_desi'}, inplace=True)

csc_desi_target_colnames = ['csc_name', 'sep_csc_desi',
                            'release', 'brickid', 'objid',
                            'ra','dec']

csc_desi = csc_desi[csc_desi_target_colnames]

# Избавляюсь от наложений площадок
# csc_desi = csc_desi[csc_desi['brick_primary']==True]

# Уникальный индекс источников DESI
csc_desi['desi_id'] = csc_desi['release'].astype('str') + '_' + \
                      csc_desi['brickid'].astype('str') + '_' + \
                      csc_desi['objid'].astype('str')

# Ограничиваю расстояние до ложных через r_false
csc_desi = csc_desi[csc_desi.sep_csc_desi < r_false]

# Исправляю артефакт getaround.py
# csc_desi['csc_name'] = csc_desi.csc_name.astype(str)
# csc_desi['csc_name'] = csc_desi.csc_name.str[2:-1]

# Индексы csc, в r_false от которых содержится не более одного DESI
group = csc_desi.groupby('csc_name')['desi_id'].count()
single_idxes = group[group == 1].index

# Тройки ERO - CSC - DESI, соответствующие индексам из ячейки выше
csc_desi_final = csc_desi[csc_desi['csc_name'].isin(single_idxes)]

csc_with_desi_fraction = csc_desi_final.shape[0] / ero_csc_single.shape[0]

csc_desi_stat = f'CSC/XMM, в r_false {FALSE_PROB:.0%} ({r_false:.2f}") от которых'
csc_desi_stat = csc_desi_stat + f' наблюдается источник DESI: {csc_desi_final.shape[0]}'
csc_desi_stat = csc_desi_stat + f' ({csc_with_desi_fraction :.0%} от {ero_csc_single.shape[0]})'

csc_stat.append(csc_desi_stat)

csc_stat

['Источники ERO, в 30" от которых есть CSC/XMM: 652',
 'Источники ERO, в 30" от которых CSC/XMM единственный: 580 (89% от 652)',
 'CSC/XMM, в r_false 3% (1.47") от которых наблюдается источник DESI: 510 (88% от 580)']

## Компаньоны ERO, согласно данным CSC

In [100]:
# Объединяю таблицы ERO - CSC и CSC - DESI
csc_singles_f03_desi = ero_csc_single.merge(csc_desi_final, on='csc_name')

csc_singles_f03_desi['IDSRC_desi_id'] = csc_singles_f03_desi[srg_names['id_src_name']].astype('str') + '_' + \
                                          csc_singles_f03_desi['release'].astype('str') + '_' + \
                                          csc_singles_f03_desi['brickid'].astype('str') + '_' + \
                                          csc_singles_f03_desi['objid'].astype('str')

# Добавляю потоки ERO (из каталога LH, не обрезанного по ярким полям)
ero_flux_df = (ero_df[[srg_names['id_src_name'],
                       srg_names['x_flux_name'],
                       srg_names['r_98_name'],
                       srg_names['sigma_2d_name'],
                       srg_names['x_ra_name'],
                       srg_names['x_dec_name'],
                       srg_names['dl_name']]]
               .drop_duplicates().reset_index(drop=True).add_prefix('ero_'))

csc_singles_f03_desi = (csc_singles_f03_desi.merge(ero_flux_df, left_on=srg_names['id_src_name'],
                                                   right_on='ero_' + srg_names['id_src_name']))

drop_columns = ['ero_srcname_fin', 'csc_conf_flag', 'csc_n_near',]
csc_singles_f03_desi = csc_singles_f03_desi.drop(columns=drop_columns)

csc_singles_f03_desi['csc_ero_flux_ratio'] = csc_singles_f03_desi['csc_flux_05_2'] / csc_singles_f03_desi['ero_' + srg_names['x_flux_name']]

no_csc_flux_fraction = csc_singles_f03_desi.csc_flux_05_2.isna().sum() / csc_singles_f03_desi.shape[0]

(f'Поток csc отсутсвует для {no_csc_flux_fraction :.0%} источников ({csc_singles_f03_desi.csc_flux_05_2.isna().sum()})')

'Поток csc отсутсвует для 14% источников (62)'

In [16]:
# csc_singles_f03_desi.to_csv('data/LH/result/ero_csc_desi.csv', index=False)

### Фильтрация по разнице потоков

In [17]:
# # Эти источники нужно вернуть в выборку после фильтрации по разнице потока
# # no_cscflux_df = csc_singles_f03_desi[csc_singles_f03_desi['csc_flux_05_2'].isna()]

# # Потоки CSC и ERO отличаются не более чем в 5 раз
# flux_ratio_thold = 5
# csc_singles_f03_desi_flux_ratio = csc_singles_f03_desi[(csc_singles_f03_desi['csc_flux / ero_flux'] > 1 / flux_ratio_thold) &
#                                                        (csc_singles_f03_desi['csc_flux / ero_flux'] < flux_ratio_thold)]

# flux_ratio_fraction = csc_singles_f03_desi_flux_ratio.shape[0] / csc_singles_f03_desi.shape[0]
# csc_total_fraction = csc_singles_f03_desi_flux_ratio.shape[0] / ero_csc_total

# # Возвращаю источники с отсутсвующими потоками в выборку
# # csc_singles_f03_desi_flux_ratio = csc_singles_f03_desi_flux_ratio.append(no_cscflux_df)

# csc_flux_ratio_stat = f'Пары CSC/XMM-ERO, потоки в которых отличаются менее чем в {flux_ratio_thold} раз:'
# csc_flux_ratio_stat = csc_flux_ratio_stat + f' {csc_singles_f03_desi_flux_ratio.shape[0]}'
# csc_flux_ratio_stat = csc_flux_ratio_stat + f' ({flux_ratio_fraction:.0%} от {csc_singles_f03_desi.shape[0]}'
# csc_flux_ratio_stat = csc_flux_ratio_stat + f', {csc_total_fraction:.0%} от {ero_csc_total})'

# print('ERO - CSC - DESI')
# csc_stat.append(csc_flux_ratio_stat)
# csc_stat

***

## Topcat ER0-XMM: all in 30 sec <a class="anchor" id="section_3"></a>

In [18]:
# https://www.notion.so/LH-data-95f7ad4a14cc4b2d8ef4e3a3237bd29b#7a59406933d843c1a953f81b3ed12c4a
# Table.read('/Users/mike/Repos/pos_prob/data/xmm_csc/4XMM_DR10cat_slim_v1.0.fits').columns

Файл [`xmm_allsky_ero_slim_point_30sec`](https://www.notion.so/LH-data-95f7ad4a14cc4b2d8ef4e3a3237bd29b#0b2a31dad6d2470ea349adaee29ee516) получен через TOPCAT: [`ero_slim_point_filtered.csv`](https://www.notion.so/LH-data-95f7ad4a14cc4b2d8ef4e3a3237bd29b#aeb003b6fc9b4541b5c31955f93a761d) ⤫ [`4XMM_DR10_useful_cols.fits`](https://www.notion.so/LH-data-95f7ad4a14cc4b2d8ef4e3a3237bd29b#eb1f12b9b3aa48928867538944a5f5d0), все источники в 30"

`4XMM_DR10_useful_cols.fits` содержит избранные колонки из [4XMM_DR10cat_slim_v1.0.fits](http://xmmssc.irap.omp.eu/Catalogue/4XMM-DR10/4XMM_DR10.html). Дополнительной фильтрации по "флагам надежности" (в отличие от каталогоа CSC) не подвергался.

> Для избавления от ненадежных источников XMM требуется применять функцию `only_reliable_xmm()`. Все каталоги уже были проматчены с источниками XMM без фильтров, не хотелось заново все перематчивать.

In [118]:

def only_reliable_xmm(df: pd.DataFrame) -> pd.DataFrame:
    """
    Filters out unreliable XMM sources.
    """
    # reliable_df = df[
    #                 ((df['SC_SUM_FLAG'] == 0) |
    #                 (df['SC_SUM_FLAG'] == 1)) &
    #                 (df['SC_DET_ML'] > 10) &
    #                 ~(df['SC_VAR_FLAG'] == True) &
    #                 (df['SC_EXTENT'] == 0) &
    #                 (df['CONFUSED'] == False)
    #                 ]
    reliable_df = df[
                    ((df['SC_SUM_FLAG'] == 0) |
                    (df['SC_SUM_FLAG'] == 1)) &
                    (df['SC_DET_ML'] > 10) &
                    (df['CONFUSED'] == False)
                    ]

    return reliable_df


#TODO get all columns from XMM catalog

xmm = fits_to_pandas('4XMM_DR10_useful_cols.fits')
xmm_filtered = only_reliable_xmm(xmm)

In [119]:
ero_xmm = cross_match_data_frames(ero_slim_point_filtered, xmm_filtered,
 'RA_fin', 'DEC_fin', 'SC_RA', 'SC_DEC', match_radius = 30)
ero_xmm

cross-match radius 30 arcsec
total matches: 868 out of 9226 x 404281


Unnamed: 0,srcname_fin,RA_fin,DEC_fin,flux_05-20,pos_sigma_2d,pos_r98,DET_LIKE_0,EXT_LIKE,sep,4XMM-DR10,SRCID,SC_RA,SC_DEC,SC_POSERR,SC_DET_ML,SC_EP_2_FLUX,SC_EP_2_FLUX_ERR,SC_EP_3_FLUX,SC_EP_3_FLUX_ERR,SC_SUM_FLAG,CONFUSED,n_near
0,SRGe J104117.7+584929,160.323629,58.824642,1.817271e-13,0.774481,2.166339,3391.888916,0.0,2.015272,249579,205562112010003,160.324685,58.824762,0.817968,742.578003,4.008580e-14,4.210150e-15,4.761820e-14,4.978040e-15,0,False,1
1,SRGe J103618.3+581246,159.076205,58.212688,1.704142e-13,0.774599,2.166670,3020.239258,0.0,1.987860,247226,205562110010001,159.076917,58.213093,0.568896,1899.140015,1.193020e-13,6.654160e-15,4.842640e-14,4.819030e-15,0,False,1
2,SRGe J105130.7+573439,162.877967,57.577478,1.338790e-13,0.807089,2.257549,2212.055908,0.0,1.884348,254656,201237001010006,162.878878,57.577667,0.088446,29813.400391,7.388300e-14,5.480390e-16,3.313780e-14,4.006350e-16,1,False,1
3,SRGe J104827.7+593300,162.115455,59.550001,1.330816e-13,0.807671,2.259178,2195.570801,0.0,1.510640,253019,205562114010004,162.114828,59.549727,0.715736,861.070984,8.107030e-14,7.783980e-15,8.171100e-14,9.464120e-15,0,False,1
4,SRGe J105316.7+573551,163.319421,57.597379,1.327770e-13,0.813768,2.276231,2174.864502,0.0,1.045047,255526,201237001010003,163.319955,57.597429,0.077942,20363.300781,4.408280e-14,3.926690e-16,5.085020e-14,4.400550e-16,1,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,SRGe J104437.2+593126,161.154875,59.523770,2.422125e-15,3.094132,8.654750,6.360213,0.0,4.815319,251026,205562129010017,161.156622,59.522768,1.410030,38.172199,3.519570e-15,1.047860e-15,2.810970e-15,1.082730e-15,0,False,1
864,SRGe J104614.0+585942,161.558429,58.995032,2.399448e-15,4.287310,11.992247,6.614729,0.0,4.079490,252289,205541201010010,161.556281,58.994787,0.338216,487.102997,1.575080e-15,1.956650e-16,7.949230e-15,4.277440e-16,1,False,1
865,SRGe J103429.8+575220,158.624361,57.872160,2.389110e-15,3.696288,10.339070,6.083099,0.0,4.414215,246654,203032602010056,158.622144,57.871823,0.835653,59.839298,1.409920e-15,2.783360e-16,1.141200e-15,2.934840e-16,0,False,1
866,SRGe J104939.7+590758,162.415404,59.132889,2.343886e-15,3.250494,9.092117,7.194517,0.0,7.846452,253584,205562114010057,162.417503,59.134784,2.503000,12.233300,1.989860e-15,1.062740e-15,1.135800e-15,1.163260e-15,0,False,1


In [120]:

# Сюда буду сохранять статистику фильтрации источников XMM
xmm_stat = []

ero_xmm_total = len(ero_xmm[srg_names['id_src_name']].unique())

# Поток 0.5-2 кэВ
ero_xmm = ero_xmm.assign(flux_05_2 = lambda x: x.SC_EP_2_FLUX + x.SC_EP_3_FLUX)
# Ошибки на поток 0.5-2 кэВ
ero_xmm = ero_xmm.assign(flux_05_2_err = lambda x: np.sqrt(x.SC_EP_2_FLUX_ERR ** 2 + x.SC_EP_3_FLUX_ERR ** 2))

xmm_err = ero_xmm['SC_POSERR']

xmm_sigma_coeff = poserr2sigma_coeff(.63)
xmm_sigma = xmm_sigma_coeff * xmm_err
ero_xmm.insert(11, 'sigma', xmm_sigma)

# pos_r98
xmm_r98 = pos_r(xmm_sigma, .98)
ero_xmm.insert(11, 'xmm_pos_r98', xmm_r98)

# xmm_useful_cols = [srg_names['id_src_name'], 'SRCID', 'SC_RA', 'SC_DEC', 'SC_POSERR',
#                    'sigma', 'xmm_pos_r98', 'flux_05_2', 'flux_05_2_err',
#                    'SC_DET_ML', 'SC_VAR_FLAG', 'SC_SUM_FLAG',
#                    'SC_EXT_ML', 'SC_EXTENT', 'SC_EXT_ERR', 'SC_FVAR', 'SC_FVARERR',
#                    'CONFUSED', 'n_near', 'sep']

xmm_useful_cols = [srg_names['id_src_name'], 'SRCID', 'SC_RA', 'SC_DEC', 'SC_POSERR',
                   'sigma', 'xmm_pos_r98', 'flux_05_2', 'flux_05_2_err',
                   'SC_DET_ML', 'SC_SUM_FLAG',
                   'CONFUSED', 'n_near', 'sep']

  

#"['SC_VAR_FLAG', 'SC_EXT_ML', 'SC_EXTENT', 'SC_EXT_ERR', 'SC_FVAR', 'SC_FVARERR'] not in index"

# Добавляю префикс `xmm_` ко всем колонкам и часть переименовываю
ero_xmm = (ero_xmm[xmm_useful_cols].add_prefix('xmm_')
           .rename(columns={'xmm_' + srg_names['id_src_name']: srg_names['id_src_name'],
                            'xmm_sep': 'sep_ero_xmm',
                            'xmm_xmm_pos_r98': 'xmm_pos_r98'}))

ero_xmm['xmm_n_near'] = ero_xmm['xmm_n_near'].fillna(1)

print(f'Всего источников XMM в 30"-окружении источников ERO LH: {ero_xmm.shape[0]}')

ero_xmm_total = len(ero_xmm[srg_names['id_src_name']].unique())

xmm_stat_1 = f'Источники ERO, в 30" от которых есть XMM: {ero_xmm_total}'
xmm_stat.append(xmm_stat_1)

ero_xmm_single = ero_xmm[ero_xmm['xmm_n_near']==1]

xmm_stat_2 = f'Источники ERO, в 30" от которых XMM единственный: {ero_xmm_single.shape[0]}'
xmm_stat_2 = xmm_stat_2 + f' ({ero_xmm_single.shape[0] / ero_xmm_total :.0%} от {ero_xmm_total})'
xmm_stat.append(xmm_stat_2)

# # Файл с координатами XMM для корреляции с источниками DESI
# ero_xmm_single[['xmm_SRCID', 'xmm_SC_RA', 'xmm_SC_DEC']].to_csv('data/LH/correlated/xmm_single_30sec_ero.csv', index=False)

Всего источников XMM в 30"-окружении источников ERO LH: 868


In [124]:

def only_reliable_xmm2(df: pd.DataFrame) -> pd.DataFrame:
    """
    Filters out unreliable XMM sources.
    """
    reliable_df = df[
                    ((df['xmm_SC_SUM_FLAG'] == 0) |
                    (df['xmm_SC_SUM_FLAG'] == 1)) &
                    (df['xmm_SC_DET_ML'] > 10) &
                    (df['xmm_CONFUSED'] == False)
                    ]


    return reliable_df

# Каталог единственных источников XMM в 30"-окружении источников ERO LH
# ненадежные объекты исключаются здесь и на фианльном этапе (особенность пайплайна для XMM)
ero_xmm_single_reliable_df = only_reliable_xmm2(ero_xmm_single)

ero_xmm_single_extented = ero_slim_point_filtered.merge(
    ero_xmm_single_reliable_df, on=srg_names['id_src_name'], how='right'
    )

ero_xmm_single_extented['ero2xmm_flux_ratio'] = ero_xmm_single_extented['flux_05-20']\
                                                / ero_xmm_single_extented['xmm_flux_05_2']

# xmm_single_savepath = 'data/LH/singles_30_sec/xmm_single_30sec_ero_reliable.csv'
# ero_xmm_single_extented.to_csv(xmm_single_savepath, index=False)

In [126]:
# Список полей, которые могут вернуться в рассмотрение, если в них окажется
# единственный источник CSC (потребуется выкинуть их снова)

xmm_crowded_fields = ero_xmm[ero_xmm['xmm_n_near'] > 1][srg_names['id_src_name']].unique()

len(xmm_crowded_fields)

46

Файл `xmm_single_desi_15sec` получен через TOPCAT: `xmm_single_30sec_ero.csv` ⤫ `desi_lh.csv`, все источники в 15"

In [129]:
xmm_desi = cross_match_data_frames(ero_xmm_single_extented, desi, colname_ra1 = 'xmm_SC_RA',colname_dec1 = 'xmm_SC_DEC' , colname_ra2 = 'ra', colname_dec2=  'dec', match_radius = 15)

cross-match radius 15 arcsec
total matches: 3450 out of 772 x 2418574


In [130]:
# Таблица XMM - DESI

xmm_desi.rename(columns={'sep': 'sep_xmm_desi'}, inplace=True)

xmm_desi_target_colnames = ['xmm_SRCID', 'sep_xmm_desi', 'release',
                            'brickid', 'objid', 'ra', 'dec']

xmm_desi = xmm_desi[xmm_desi_target_colnames]

# xmm_desi.rename(columns={srg_names['desi_sep_name']: 'sep_xmm_desi'}, inplace=True)

# Избавляюсь от наложений площадок
# xmm_desi = xmm_desi[xmm_desi['brick_primary']==True]

# Уникальный индекс источников DESI
xmm_desi['desi_id'] = xmm_desi['release'].astype('str') + '_' + \
                      xmm_desi['brickid'].astype('str') + '_' + \
                      xmm_desi['objid'].astype('str')

# Ограничиваю расстояние до ложных через r_false
xmm_desi_rfalse = xmm_desi[xmm_desi.sep_xmm_desi < r_false]

# Индексы XMM, в r_false от которых содержится не более одного DESI
group = xmm_desi_rfalse.groupby('xmm_SRCID')['desi_id'].count()

single_idxes = group[group == 1].index

# Тройки ERO - XMM - DESI, соответствующие индексам из ячейки выше
xmm_desi_final = xmm_desi_rfalse[xmm_desi_rfalse['xmm_SRCID'].isin(single_idxes)]
xmm_with_desi_fraction = csc_desi_final.shape[0] / ero_csc_single.shape[0]

# xmm_desi_stat = f'Объект DESI в r_false {r_false:.2f}" наблюдается для {xmm_desi_final.shape[0]} источников XMM'
# xmm_desi_stat = xmm_desi_stat + f' ({xmm_with_desi_fraction :.0%} от {ero_xmm_single.shape[0]})'

xmm_desi_stat = f'Источники XMM, в r_false {FALSE_PROB:.0%} ({r_false:.2f}") от которых'
xmm_desi_stat = xmm_desi_stat + f' наблюдается источник DESI: {xmm_desi_final.shape[0]}'
xmm_desi_stat = xmm_desi_stat + f' ({xmm_with_desi_fraction :.0%} от {ero_xmm_single.shape[0]})'

xmm_stat.append(xmm_desi_stat)

xmm_stat

# print(f'Объект DESI в r_false наблюдается для {xmm_desi_final.shape[0]} источников XMM')
# print(f'(для{xmm_desi_final.shape[0] / ero_xmm_single.shape[0] : .0%} из всех подходящих объектов XMM)')

['Источники ERO, в 30" от которых есть XMM: 818',
 'Источники ERO, в 30" от которых XMM единственный: 772 (94% от 818)',
 'Источники XMM, в r_false 3% (1.47") от которых наблюдается источник DESI: 494 (88% от 772)']

### Hostless

In [132]:
# Add XMM features
all_xmm_ero_desi_df = ero_xmm_single.merge(xmm_desi, on='xmm_SRCID')

# Leave only closest DESI source in every xmm_SRCID group
closest_xmm2desi_df = (
    all_xmm_ero_desi_df
    .loc[all_xmm_ero_desi_df.groupby('xmm_SRCID')['sep_xmm_desi'].idxmin()]
    )

xmm_hostless_df = (closest_xmm2desi_df[
    closest_xmm2desi_df['sep_xmm_desi'] > 1.5 * closest_xmm2desi_df['xmm_pos_r98']
    ])

# Filter out unreliable XMM
#xmm_hostless_df = only_reliable_xmm(xmm_hostless_df)

# Add ERO features
xmm_hostless_extendet_df = ero_slim_point_filtered.merge(
    xmm_hostless_df, on=srg_names['id_src_name'], how='right'
    )

xmm_hostless_final_df = xmm_hostless_extendet_df.query('sep_ero_xmm < pos_r98')

len(xmm_hostless_final_df)

54

In [24]:
# xmm_hostless_final_df.to_csv('data/xmm_hostless_final_df.csv', index=False)

## Компаньоны ERO, согласно данным XMM <a class="anchor" id="section_4"></a>

In [133]:
# Объединяю таблицы ERO - XMM и XMM - DESI
xmm_singles_f03_desi = ero_xmm_single.merge(xmm_desi_final, on='xmm_SRCID')

# Unique ERO - DESI pairs
xmm_singles_f03_desi['IDSRC_desi_id'] = xmm_singles_f03_desi[srg_names['id_src_name']].astype('str') + '_' + \
                                        xmm_singles_f03_desi['release'].astype('str') + '_' + \
                                        xmm_singles_f03_desi['brickid'].astype('str') + '_' + \
                                        xmm_singles_f03_desi['objid'].astype('str')

xmm_singles_f03_desi

Unnamed: 0,srcname_fin,xmm_SRCID,xmm_SC_RA,xmm_SC_DEC,xmm_SC_POSERR,xmm_sigma,xmm_pos_r98,xmm_flux_05_2,xmm_flux_05_2_err,xmm_SC_DET_ML,xmm_SC_SUM_FLAG,xmm_CONFUSED,xmm_n_near,sep_ero_xmm,sep_xmm_desi,release,brickid,objid,ra,dec,desi_id,IDSRC_desi_id
0,SRGe J104117.7+584929,205562112010003,160.324685,58.824762,0.817968,0.580060,1.622515,8.770400e-14,6.519682e-15,742.578003,0,False,1,2.015272,0.975167,9011,613766,43,160.324219,58.824639,9011_613766_43,SRGe J104117.7+584929_9011_613766_43
1,SRGe J103618.3+581246,205562110010001,159.076917,58.213093,0.568896,0.403431,1.128458,1.677284e-13,8.215893e-15,1899.140015,0,False,1,1.987860,1.384893,9011,612250,2743,159.076381,58.212831,9011_612250_2743,SRGe J103618.3+581246_9011_612250_2743
2,SRGe J105316.7+573551,201237001010003,163.319955,57.597429,0.077942,0.055272,0.154605,9.493300e-14,5.897773e-16,20363.300781,1,False,1,1.045047,0.199473,9011,609950,3809,163.319852,57.597425,9011_609950_3809,SRGe J105316.7+573551_9011_609950_3809
3,SRGe J105300.9+574208,201237001010007,163.254014,57.702248,0.117241,0.083141,0.232558,1.417091e-13,1.175553e-15,7033.089844,1,False,1,0.433385,0.912156,9011,610726,375,163.254487,57.702268,9011_610726_375,SRGe J105300.9+574208_9011_610726_375
4,SRGe J104144.5+594258,205562117010001,160.435401,59.715994,0.594453,0.421555,1.179152,1.324608e-13,6.995086e-15,2003.729980,0,False,1,0.351432,0.180411,9011,616727,1391,160.435399,59.715943,9011_616727_1391,SRGe J104144.5+594258_9011_616727_1391
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
489,SRGe J105300.0+573153,201237002010141,163.249346,57.531882,0.226701,0.160765,0.449682,2.533720e-15,1.031501e-16,524.494019,1,False,1,2.236481,0.187163,9011,609950,3207,163.249275,57.531846,9011_609950_3207,SRGe J105300.0+573153_9011_609950_3207
490,SRGe J103318.9+575125,203032602010037,158.328992,57.857805,0.746229,0.529187,1.480214,3.660140e-15,4.082850e-16,134.804001,1,False,1,3.548774,0.848630,9011,610715,1858,158.329187,57.857594,9011_610715_1858,SRGe J103318.9+575125_9011_610715_1858
491,SRGe J105457.9+573855,201475112010065,163.745627,57.649104,0.587004,0.416273,1.164376,3.758242e-15,4.471437e-16,97.123802,0,False,1,8.732354,1.429558,9011,610727,413,163.746126,57.648810,9011_610727_413,SRGe J105457.9+573855_9011_610727_413
492,SRGe J104614.0+585942,205541201010010,161.556281,58.994787,0.338216,0.239845,0.670883,9.524310e-15,4.703719e-16,487.102997,1,False,1,4.079490,0.280728,9011,614516,2658,161.556353,58.994856,9011_614516_2658,SRGe J104614.0+585942_9011_614516_2658


### Фильтрация по разнице потоков

In [135]:
# # Добавляю потоки ERO (из каталога LH, не обрезанного по ярким полям)
# ero_flux_df = (ero_df[[srg_names['id_src_name'], srg_names['x_flux_name']]]
#                .drop_duplicates().reset_index(drop=True).add_prefix('ero_'))

xmm_singles_f03_desi = xmm_singles_f03_desi.merge(ero_flux_df, left_on=srg_names['id_src_name'],
                                                  right_on='ero_' + srg_names['id_src_name'])

drop_columns = ['ero_srcname_fin', 'xmm_n_near', ]
xmm_singles_f03_desi = xmm_singles_f03_desi.drop(columns=drop_columns)

# Отношение потоков XMM и ERO
xmm_singles_f03_desi['xmm_ero_flux_ratio'] = (xmm_singles_f03_desi['xmm_flux_05_2'] /
                                              xmm_singles_f03_desi['ero_' + srg_names['x_flux_name']])

# flux_ratio_thold = 5

# # Потоки xmm и ERO отличаются не более чем в 5 раз
# xmm_singles_f03_desi_flux_ratio = xmm_singles_f03_desi[(xmm_singles_f03_desi['xmm_flux / ero_flux'] > 1 / flux_ratio_thold) &
#                                                        (xmm_singles_f03_desi['xmm_flux / ero_flux'] < flux_ratio_thold)]


# flux_ratio_fraction = xmm_singles_f03_desi_flux_ratio.shape[0] / xmm_singles_f03_desi.shape[0]
# xmm_total_fraction = xmm_singles_f03_desi_flux_ratio.shape[0] / ero_xmm_total

# xmm_flux_ratio_stat = f'Пары XMM-ERO, потоки в которых отличаются менее чем в {flux_ratio_thold} раз:'
# xmm_flux_ratio_stat = xmm_flux_ratio_stat + f' {xmm_singles_f03_desi_flux_ratio.shape[0]}'
# xmm_flux_ratio_stat = xmm_flux_ratio_stat + f' ({flux_ratio_fraction:.0%} от {xmm_singles_f03_desi.shape[0]}'
# xmm_flux_ratio_stat = xmm_flux_ratio_stat + f', {xmm_total_fraction:.0%} от {ero_xmm_total})'

# xmm_stat.append(xmm_flux_ratio_stat)
# xmm_stat

In [137]:
# Reliability flags

# XMM catalogue filtering
#mm_final_df = only_reliable_xmm(xmm_singles_f03_desi)
xmm_final_df  = xmm_singles_f03_desi 
xmm_final_df

Unnamed: 0,srcname_fin,xmm_SRCID,xmm_SC_RA,xmm_SC_DEC,xmm_SC_POSERR,xmm_sigma,xmm_pos_r98,xmm_flux_05_2,xmm_flux_05_2_err,xmm_SC_DET_ML,xmm_SC_SUM_FLAG,xmm_CONFUSED,sep_ero_xmm,sep_xmm_desi,release,brickid,objid,ra,dec,desi_id,IDSRC_desi_id,ero_flux_05-20,ero_pos_r98,ero_pos_sigma_2d,ero_RA_fin,ero_DEC_fin,ero_DET_LIKE_0,xmm_ero_flux_ratio
0,SRGe J104117.7+584929,205562112010003,160.324685,58.824762,0.817968,0.580060,1.622515,8.770400e-14,6.519682e-15,742.578003,0,False,2.015272,0.975167,9011,613766,43,160.324219,58.824639,9011_613766_43,SRGe J104117.7+584929_9011_613766_43,1.817271e-13,2.166339,0.774481,160.323629,58.824642,3391.888916,0.482614
1,SRGe J103618.3+581246,205562110010001,159.076917,58.213093,0.568896,0.403431,1.128458,1.677284e-13,8.215893e-15,1899.140015,0,False,1.987860,1.384893,9011,612250,2743,159.076381,58.212831,9011_612250_2743,SRGe J103618.3+581246_9011_612250_2743,1.704142e-13,2.166670,0.774599,159.076205,58.212688,3020.239258,0.984240
2,SRGe J105316.7+573551,201237001010003,163.319955,57.597429,0.077942,0.055272,0.154605,9.493300e-14,5.897773e-16,20363.300781,1,False,1.045047,0.199473,9011,609950,3809,163.319852,57.597425,9011_609950_3809,SRGe J105316.7+573551_9011_609950_3809,1.327770e-13,2.276231,0.813768,163.319421,57.597379,2174.864502,0.714981
3,SRGe J105300.9+574208,201237001010007,163.254014,57.702248,0.117241,0.083141,0.232558,1.417091e-13,1.175553e-15,7033.089844,1,False,0.433385,0.912156,9011,610726,375,163.254487,57.702268,9011_610726_375,SRGe J105300.9+574208_9011_610726_375,1.266503e-13,2.284281,0.816646,163.253903,57.702143,2098.832275,1.118901
4,SRGe J104144.5+594258,205562117010001,160.435401,59.715994,0.594453,0.421555,1.179152,1.324608e-13,6.995086e-15,2003.729980,0,False,0.351432,0.180411,9011,616727,1391,160.435399,59.715943,9011_616727_1391,SRGe J104144.5+594258_9011_616727_1391,1.125101e-13,2.337449,0.835654,160.435424,59.716091,1862.223511,1.177323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
490,SRGe J105300.0+573153,201237002010141,163.249346,57.531882,0.226701,0.160765,0.449682,2.533720e-15,1.031501e-16,524.494019,1,False,2.236481,0.187163,9011,609950,3207,163.249275,57.531846,9011_609950_3207,SRGe J105300.0+573153_9011_609950_3207,2.613640e-15,10.116030,3.616549,163.249738,57.531297,6.420947,0.969422
491,SRGe J103318.9+575125,203032602010037,158.328992,57.857805,0.746229,0.529187,1.480214,3.660140e-15,4.082850e-16,134.804001,1,False,3.548774,0.848630,9011,610715,1858,158.329187,57.857594,9011_610715_1858,SRGe J103318.9+575125_9011_610715_1858,2.526363e-15,9.949962,3.557179,158.328724,57.856830,7.128131,1.448779
492,SRGe J105457.9+573855,201475112010065,163.745627,57.649104,0.587004,0.416273,1.164376,3.758242e-15,4.471437e-16,97.123802,0,False,8.732354,1.429558,9011,610727,413,163.746126,57.648810,9011_610727_413,SRGe J105457.9+573855_9011_610727_413,2.521052e-15,10.676488,3.816917,163.741165,57.648678,8.032424,1.490744
493,SRGe J104614.0+585942,205541201010010,161.556281,58.994787,0.338216,0.239845,0.670883,9.524310e-15,4.703719e-16,487.102997,1,False,4.079490,0.280728,9011,614516,2658,161.556353,58.994856,9011_614516_2658,SRGe J104614.0+585942_9011_614516_2658,2.399448e-15,11.992247,4.287310,161.558429,58.995032,6.614729,3.969376


In [138]:
frac_xmm_unreliable = (len(xmm_singles_f03_desi) - len(xmm_final_df)) / len(xmm_singles_f03_desi)

print(f'Отсеяно {frac_xmm_unreliable:.0%} ненадежных источников XMM')

Отсеяно 0% ненадежных источников XMM


In [30]:
# # Save hostless XMM catalogue
# csc_final_df.to_csv('data/LH/result/ero_xmm_desi.csv', index=False)